├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── dependencies ├── install_protobuf-3.8.0.sh └── install_pycuda.sh ├── docs ├── results.gif ├── yolov4_custom_1.png ├── yolov4_custom_2.png └── yolov4_custom_3.png ├── launch ├── video_source.launch ├── yolov3_trt.launch ├── yolov4_trt.launch └── yolov4_trt_batch.launch ├── msg ├── BoundingBox2D.msg ├── Detector2D.msg ├── Detector2DArray.msg └── ObjectHypothesis.msg ├── package.xml ├── plugins ├── Makefile ├── README.md ├── libyolo_layer.so ├── yolo_layer.cu ├── yolo_layer.h └── yolo_layer.o ├── setup.py ├── src ├── image_converter.cpp ├── image_converter.h ├── node_video_source.cpp ├── ros_compat.cpp └── ros_compat.h ├── trt_yolo_v3.py ├── trt_yolo_v4.py ├── trt_yolo_v4_batch.py ├── utils ├── __init__.py ├── display.py ├── visualization.py ├── yolo_classes.py ├── yolo_with_plugins.py └── yolo_with_plugins_batch.py └── yolo ├── __pycache__ └── plugins.cpython-36.pyc ├── convert_yolo_trt.sh ├── download_yolo.sh ├── onnx_to_tensorrt.py ├── plugins.py ├── requirements.txt └── yolo_to_onnx.py /.gitignore: -------------------------------------------------------------------------------- 1 | yolo/yolov4-416.trt 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.2) 2 | project(yolov4_trt_ros) 3 | 4 | find_package(OpenCV 3.0 REQUIRED) 5 | find_package(jetson-utils REQUIRED) 6 | find_package(jetson-inference REQUIRED) 7 | find_package(CUDA REQUIRED) 8 | 9 | catkin_python_setup() 10 | 11 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/package.xml ${CMAKE_CURRENT_SOURCE_DIR}/package.xml COPYONLY) 12 | 13 | find_package(catkin REQUIRED COMPONENTS 14 | image_transport 15 | rospy 16 | sensor_msgs 17 | vision_msgs 18 | cv_bridge 19 | message_generation 20 | ) 21 | 22 | add_message_files( 23 | FILES 24 | Detector2D.msg 25 | Detector2DArray.msg 26 | BoundingBox2D.msg 27 | ObjectHypothesis.msg 28 | ) 29 | 30 | generate_messages(DEPENDENCIES std_msgs vision_msgs) 31 | 32 | catkin_package( 33 | CATKIN_DEPENDS nodelet rospy image_transport sensor_msgs vision_msgs cv_bridge message_runtime 34 | ) 35 | 36 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enable c++11 (TensorRT requirement) 37 | include_directories(${catkin_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS}) 38 | add_definitions(-DROS1) 39 | 40 | set(common_src src/image_converter.cpp src/ros_compat.cpp) 41 | 42 | add_executable(video_source src/node_video_source.cpp ${common_src}) 43 | target_link_libraries(video_source ${catkin_LIBRARIES} jetson-inference) 44 | 45 | catkin_install_python(PROGRAMS trt_yolo_v4_batch.py 46 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 47 | ) 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Indra Kurniawan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv4 with TensorRT engine 2 | 3 | This package contains the yolov4_trt_node that performs the inference using NVIDIA's TensorRT engine 4 | 5 | This package works for both YOLOv3 and YOLOv4. Do change the commands accordingly, corresponding to the YOLO model used. 6 | 7 | 8 | ![Video_Result2](docs/results.gif) 9 | 10 | --- 11 | ## Setting up the environment 12 | 13 | ### Install dependencies 14 | 15 | ### Current Environment: 16 | 17 | - Jetson Xavier AGX 18 | - ROS Melodic 19 | - Ubuntu 18.04 20 | - Jetpack 4.4 21 | - TensorRT 7+ 22 | 23 | #### Dependencies: 24 | 25 | - OpenCV 3.x 26 | - numpy 1.15.1 27 | - Protobuf 3.8.0 28 | - Pycuda 2019.1.2 29 | - onnx 1.4.1 (depends on Protobuf) 30 | 31 | ### Install all dependencies with below commands 32 | 33 | ``` 34 | Install pycuda (takes awhile) 35 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/dependencies 36 | $ ./install_pycuda.sh 37 | 38 | Install Protobuf (takes awhile) 39 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/dependencies 40 | $ ./install_protobuf-3.8.0.sh 41 | 42 | Install onnx (depends on Protobuf above) 43 | $ sudo pip3 install onnx==1.4.1 44 | ``` 45 | 46 | * Please also install [jetson-inference](https://github.com/dusty-nv/ros_deep_learning#jetson-inference) 47 | * Note: This package uses similar nodes to ros_deep_learning package. Please place a CATKIN_IGNORE in that package to avoid similar node name catkin_make error 48 | * If these scripts do not work for you, do refer to this amazing repository by [jefflgaol](https://github.com/jefflgaol/Install-Packages-Jetson-ARM-Family) on installing the above packages and more on Jetson ARM devices. 49 | --- 50 | ## Setting up the package 51 | 52 | ### 1. Clone project into catkin_ws and build it 53 | 54 | ``` 55 | $ cd ~/catkin_ws && catkin_make 56 | $ source devel/setup.bash 57 | ``` 58 | 59 | ### 2. Make libyolo_layer.so 60 | 61 | ``` 62 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/plugins 63 | $ make 64 | ``` 65 | 66 | This will generate a libyolo_layer.so file 67 | 68 | ### 3. Place your yolo.weights and yolo.cfg file in the yolo folder 69 | 70 | ``` 71 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/yolo 72 | ``` 73 | ** Please name the yolov4.weights and yolov4.cfg file as follows: 74 | - yolov4.weights 75 | - yolov4.cfg 76 | 77 | Run the conversion script to convert to TensorRT engine file 78 | 79 | ``` 80 | $ ./convert_yolo_trt 81 | ``` 82 | 83 | - Input the appropriate arguments 84 | - This conversion might take awhile 85 | - The optimised TensorRT engine would now be saved as yolov3-416.trt / yolov4-416.trt 86 | 87 | ### 4. Change the class labels 88 | 89 | ``` 90 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/utils 91 | $ vim yolo_classes.py 92 | ``` 93 | 94 | - Change the class labels to suit your model 95 | 96 | ### 5. Change the video_input and topic_name 97 | 98 | ``` 99 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/launch 100 | ``` 101 | 102 | - `yolov3_trt.launch` : change the topic_name 103 | 104 | - `yolov4_trt.launch` : change the topic_name 105 | 106 | - `video_source.launch` : change the input format (refer to this [Link](https://github.com/dusty-nv/jetson-inference/blob/master/docs/aux-streaming.md) 107 | 108 | * video_source.launch requires jetson-inference to be installed 109 | * Default input is CSI camera 110 | 111 | --- 112 | ## Using the package 113 | 114 | ### Running the package 115 | 116 | Note: Run the launch files separately in different terminals 117 | 118 | ### 1. Run the video_source 119 | 120 | ``` 121 | # For csi input 122 | $ roslaunch yolov4_trt_ros video_source.launch input:=csi://0 123 | 124 | # For video input 125 | $ roslaunch yolov4_trt_ros video_source.launch input:=/path_to_video/video.mp4 126 | 127 | # For USB camera 128 | $ roslaunch yolov4_trt_ros video_source.launch input:=v4l2://0 129 | ``` 130 | 131 | ### 2. Run the yolo detector 132 | 133 | ``` 134 | # For YOLOv3 (single input) 135 | $ roslaunch yolov4_trt_ros yolov3_trt.launch 136 | 137 | # For YOLOv4 (single input) 138 | $ roslaunch yolov4_trt_ros yolov4_trt.launch 139 | 140 | # For YOLOv4 (multiple input) 141 | $ roslaunch yolov4_trt_ros yolov4_trt_batch.launch 142 | ``` 143 | 144 | ### 3. For maximum performance 145 | 146 | ``` 147 | $ cd /usr/bin/ 148 | $ sudo ./nvpmodel -m 0 # Enable 2 Denver CPU 149 | $ sudo ./jetson_clock # Maximise CPU/GPU performance 150 | ``` 151 | 152 | * These commands are found/referred in this [forum post](https://forums.developer.nvidia.com/t/nvpmodel-and-jetson-clocks/58659/2) 153 | * Please ensure the jetson device is cooled appropriately to prevent overheating 154 | 155 | ### Parameters 156 | 157 | - str model = "yolov3" or "yolov4" 158 | - str model_path = "/abs_path_to_model/" 159 | - int input_shape = 288/416/608 160 | - int category_num = 80 161 | - double conf_th = 0.5 162 | - bool show_img = True 163 | 164 | - Default Input FPS from CSI camera = 30.0 165 | * To change this, go to jetson-inference/utils/camera/gstCamera.cpp 166 | 167 | ``` 168 | # In line 359, change this line 169 | mOptions.frameRate = 15 170 | 171 | # To desired frame_rate 172 | mOptions.frameRate = desired_frame_rate 173 | ``` 174 | --- 175 | ## Results obtained 176 | 177 | ### Inference Results 178 | #### Single Camera Input 179 | 180 | | Model | Hardware | FPS | Inference Time (ms) | 181 | |:---------|:--:|:---------:|:----------------:| 182 | | Yolov4-416| Xavier AGX | 40.0 | 0.025 | 183 | | Yolov4-416| Jetson Tx2 | 16.0 | 0.0625 | 184 | 185 | 186 | * Will be adding inference tests for YOLOv3/4 and YOLOv3-tiny/YOLOv4-tiny for different Jetson devices and multiple camera inputs inference tests in the future 187 | 188 | ### 1. Screenshots 189 | 190 | ### Video_in: .mp4 video (640x360) 191 | ### Tests Done on Xavier AGX 192 | ### Avg FPS : ~38 FPS 193 | 194 | ![Video_Result1](docs/yolov4_custom_1.png) 195 | 196 | ![Video_Result2](docs/yolov4_custom_2.png) 197 | 198 | ![Video_Result3](docs/yolov4_custom_3.png) 199 | 200 | --- 201 | ## Licenses and References 202 | 203 | ### 1. TensorRT samples from [jkjung-avt](https://github.com/jkjung-avt/) 204 | 205 | Many thanks for his project with tensorrt samples. I have referenced his source code and adapted it to ROS for robotics applications. 206 | 207 | I also used the pycuda and protobuf installation script from his project 208 | 209 | Those code are under [MIT License](https://github.com/jkjung-avt/tensorrt_demos/blob/master/LICENSE) 210 | 211 | ### 2. Jetson-inference from [dusty-nv](https://github.com/dusty-nv/ros_deep_learning#jetson-inference) 212 | 213 | Many thanks for his work on the Jetson Inference with ROS. I have used his video_source input from his project for capturing video inputs. 214 | 215 | Those code are under [NVIDIA License](https://github.com/dusty-nv/jetson-inference/blob/master/LICENSE.md) -------------------------------------------------------------------------------- /dependencies/install_protobuf-3.8.0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | folder=${HOME}/src 6 | mkdir -p $folder 7 | 8 | echo "** Install requirements" 9 | sudo apt-get install -y autoconf libtool 10 | 11 | echo "** Download protobuf-3.8.0 sources" 12 | cd $folder 13 | if [ ! -f protobuf-python-3.8.0.zip ]; then 14 | wget https://github.com/protocolbuffers/protobuf/releases/download/v3.8.0/protobuf-python-3.8.0.zip 15 | fi 16 | if [ ! -f protoc-3.8.0-linux-aarch_64.zip ]; then 17 | wget https://github.com/protocolbuffers/protobuf/releases/download/v3.8.0/protoc-3.8.0-linux-aarch_64.zip 18 | fi 19 | 20 | echo "** Install protoc" 21 | unzip protobuf-python-3.8.0.zip 22 | unzip protoc-3.8.0-linux-aarch_64.zip -d protoc-3.8.0 23 | sudo cp protoc-3.8.0/bin/protoc /usr/local/bin/protoc 24 | 25 | echo "** Build and install protobuf-3.8.0 libraries" 26 | export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp 27 | cd protobuf-3.8.0/ 28 | ./autogen.sh 29 | ./configure --prefix=/usr/local 30 | make -j$(nproc) 31 | make check 32 | sudo make install 33 | sudo ldconfig 34 | 35 | echo "** Update python3 protobuf module" 36 | # remove previous installation of python3 protobuf module 37 | sudo pip3 uninstall -y protobuf 38 | sudo pip3 install Cython 39 | cd python/ 40 | python3 setup.py build --cpp_implementation 41 | python3 setup.py test --cpp_implementation 42 | sudo python3 setup.py install --cpp_implementation 43 | 44 | echo "** Build protobuf-3.8.0 successfully" 45 | -------------------------------------------------------------------------------- /dependencies/install_pycuda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Reference for installing 'pycuda': https://wiki.tiker.net/PyCuda/Installation/Linux/Ubuntu 4 | 5 | set -e 6 | 7 | if ! which nvcc > /dev/null; then 8 | echo "ERROR: nvcc not found" 9 | exit 10 | fi 11 | 12 | arch=$(uname -m) 13 | folder=${HOME}/src 14 | mkdir -p $folder 15 | 16 | echo "** Install requirements" 17 | sudo apt-get install -y build-essential python3-dev 18 | sudo apt-get install -y libboost-python-dev libboost-thread-dev 19 | sudo pip3 install setuptools 20 | 21 | boost_pylib=$(basename /usr/lib/${arch}-linux-gnu/libboost_python*-py3?.so) 22 | boost_pylibname=${boost_pylib%.so} 23 | boost_pyname=${boost_pylibname/lib/} 24 | 25 | echo "** Download pycuda-2019.1.2 sources" 26 | pushd $folder 27 | if [ ! -f pycuda-2019.1.2.tar.gz ]; then 28 | wget https://files.pythonhosted.org/packages/5e/3f/5658c38579b41866ba21ee1b5020b8225cec86fe717e4b1c5c972de0a33c/pycuda-2019.1.2.tar.gz 29 | fi 30 | 31 | echo "** Build and install pycuda-2019.1.2" 32 | CPU_CORES=$(nproc) 33 | echo "** cpu cores available: " $CPU_CORES 34 | tar xzvf pycuda-2019.1.2.tar.gz 35 | cd pycuda-2019.1.2 36 | python3 ./configure.py --python-exe=/usr/bin/python3 --cuda-root=/usr/local/cuda --cudadrv-lib-dir=/usr/lib/${arch}-linux-gnu --boost-inc-dir=/usr/include --boost-lib-dir=/usr/lib/${arch}-linux-gnu --boost-python-libname=${boost_pyname} --boost-thread-libname=boost_thread --no-use-shipped-boost 37 | make -j$CPU_CORES 38 | python3 setup.py build 39 | sudo python3 setup.py install 40 | 41 | popd 42 | 43 | python3 -c "import pycuda; print('pycuda version:', pycuda.VERSION)" 44 | -------------------------------------------------------------------------------- /docs/results.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/results.gif -------------------------------------------------------------------------------- /docs/yolov4_custom_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_1.png -------------------------------------------------------------------------------- /docs/yolov4_custom_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_2.png -------------------------------------------------------------------------------- /docs/yolov4_custom_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_3.png -------------------------------------------------------------------------------- /launch/video_source.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /launch/yolov3_trt.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /launch/yolov4_trt.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /launch/yolov4_trt_batch.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /msg/BoundingBox2D.msg: -------------------------------------------------------------------------------- 1 | # A 2D bounding box that can be rotated about its center. 2 | # All dimensions are in pixels, but represented using floating-point 3 | # values to allow sub-pixel precision. If an exact pixel crop is required 4 | # for a rotated bounding box, it can be calculated using Bresenham's line 5 | # algorithm. 6 | 7 | # The 2D position (in pixels) and orientation of the bounding box center. 8 | geometry_msgs/Pose2D center 9 | 10 | # The size (in pixels) of the bounding box surrounding the object relative 11 | # to the pose of its center. 12 | float64 size_x 13 | float64 size_y 14 | -------------------------------------------------------------------------------- /msg/Detector2D.msg: -------------------------------------------------------------------------------- 1 | # Defines a 2D detection result. 2 | # 3 | # This is similar to a 2D classification, but includes position information, 4 | # allowing a classification result for a specific crop or image point to 5 | # to be located in the larger image. 6 | 7 | Header header 8 | 9 | # Class probabilities 10 | ObjectHypothesis results 11 | 12 | # 2D bounding box surrounding the object. 13 | BoundingBox2D bbox 14 | -------------------------------------------------------------------------------- /msg/Detector2DArray.msg: -------------------------------------------------------------------------------- 1 | # A list of 2D detections, for a multi-object 2D detector. 2 | 3 | Header header 4 | 5 | # A list of the detected proposals. A multi-proposal detector might generate 6 | # this list with many candidate detections generated from a single input. 7 | Detector2D[] detections 8 | -------------------------------------------------------------------------------- /msg/ObjectHypothesis.msg: -------------------------------------------------------------------------------- 1 | # An object hypothesis that contains no position information. 2 | 3 | # The unique numeric ID of object detected. To get additional information about 4 | # this ID, such as its human-readable name, listeners should perform a lookup 5 | # in a metadata database. See vision_msgs/VisionInfo.msg for more detail. 6 | int64 id 7 | 8 | # The probability or confidence value of the detected object. By convention, 9 | # this value should lie in the range [0-1]. 10 | float64 score 11 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | yolov4_trt_ros 4 | 0.0.0 5 | The yolov4_trt_ros package 6 | 7 | indra 8 | 9 | BSD 10 | catkin 11 | 12 | cv_bridge 13 | image_transport 14 | rospy 15 | sensor_msgs 16 | vision_msgs 17 | nodelet 18 | message_generation 19 | 20 | cv_bridge 21 | image_transport 22 | rospy 23 | sensor_msgs 24 | vision_msgs 25 | nodelet 26 | 27 | cv_bridge 28 | image_transport 29 | rospy 30 | sensor_msgs 31 | vision_msgs 32 | nodelet 33 | message_runtime 34 | 35 | -------------------------------------------------------------------------------- /plugins/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | LD=ld 3 | CXXFLAGS=-Wall -std=c++11 -g -O 4 | 5 | NVCC=nvcc 6 | 7 | cpu_arch=$(shell uname -m) 8 | ifeq ($(cpu_arch), aarch64) # Jetson 9 | chip_id=$(shell cat /sys/module/tegra_fuse/parameters/tegra_chip_id) 10 | ifeq ($(chip_id), 33) # Nano and TX1 11 | compute=53 12 | else ifeq ($(chip_id), 24) # TX2 13 | compute=62 14 | else ifeq ($(chip_id), 25) # Xavier NX 15 | compute=72 16 | else 17 | $(error Cannot determine cuda compute automatically, so please modify Makefile manually) 18 | endif 19 | else ifeq ($(cpu_arch), x86_64) # x86_64 PC 20 | $(warning "compute=75" is for GeForce RTX-2080 Ti. Please make sure CUDA compute is set correctly for your system in the Makefile.) 21 | compute=75 22 | else 23 | $(error Unkown CPU architecture: neither "aarch" nor "x86_64") 24 | endif 25 | 26 | NVCCFLAGS=-m64 -gencode arch=compute_$(compute),code=sm_$(compute) \ 27 | -gencode arch=compute_$(compute),code=compute_$(compute) 28 | 29 | # These are the directories where I installed TensorRT on my x86_64 PC. 30 | TENSORRT_INCS=-I"/usr/local/TensorRT-7.1.3.4/include" 31 | TENSORRT_LIBS=-L"/usr/local/TensorRT-7.1.3.4/lib" 32 | 33 | # INCS and LIBS 34 | INCS=-I"/usr/local/cuda/include" $(TENSORRT_INCS) -I"/usr/local/include" -I"plugin" 35 | LIBS=-L"/usr/local/cuda/lib64" $(TENSORRT_LIBS) -L"/usr/local/lib" -Wl,--start-group -lnvinfer -lnvparsers -lnvinfer_plugin -lcudnn -lcublas -lcudart_static -lnvToolsExt -lcudart -lrt -ldl -lpthread -Wl,--end-group 36 | 37 | .PHONY: all clean 38 | 39 | all: libyolo_layer.so 40 | 41 | clean: 42 | rm -f *.so *.o 43 | 44 | libyolo_layer.so: yolo_layer.o 45 | $(CC) -shared -o $@ $< $(LIBS) 46 | 47 | yolo_layer.o: yolo_layer.cu yolo_layer.h 48 | $(NVCC) -ccbin $(CC) $(INCS) $(NVCCFLAGS) -Xcompiler -fPIC -c -o $@ $< 49 | -------------------------------------------------------------------------------- /plugins/README.md: -------------------------------------------------------------------------------- 1 | The "yolo_layer.h" and "yolo_layer.cu" were taken and modified from [wang-xinyu/tensorrtx/yolov4](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4). The original code is under [MIT License](https://github.com/wang-xinyu/tensorrtx/blob/master/LICENSE). 2 | -------------------------------------------------------------------------------- /plugins/libyolo_layer.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/plugins/libyolo_layer.so -------------------------------------------------------------------------------- /plugins/yolo_layer.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * yolo_layer.cu 3 | * 4 | * This code was originally written by wang-xinyu under MIT license. 5 | * I took it from: 6 | * 7 | * https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4 8 | * 9 | * and made necessary modifications. 10 | * 11 | * - JK Jung 12 | */ 13 | 14 | #include "yolo_layer.h" 15 | 16 | using namespace Yolo; 17 | 18 | namespace 19 | { 20 | // Write values into buffer 21 | template 22 | void write(char*& buffer, const T& val) 23 | { 24 | *reinterpret_cast(buffer) = val; 25 | buffer += sizeof(T); 26 | } 27 | 28 | // Read values from buffer 29 | template 30 | void read(const char*& buffer, T& val) 31 | { 32 | val = *reinterpret_cast(buffer); 33 | buffer += sizeof(T); 34 | } 35 | } // namespace 36 | 37 | namespace nvinfer1 38 | { 39 | YoloLayerPlugin::YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y) 40 | { 41 | mYoloWidth = yolo_width; 42 | mYoloHeight = yolo_height; 43 | mNumAnchors = num_anchors; 44 | memcpy(mAnchorsHost, anchors, num_anchors * 2 * sizeof(float)); 45 | mNumClasses = num_classes; 46 | mInputWidth = input_width; 47 | mInputHeight = input_height; 48 | mScaleXY = scale_x_y; 49 | 50 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float))); 51 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice)); 52 | } 53 | 54 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) 55 | { 56 | const char *d = reinterpret_cast(data), *a = d; 57 | read(d, mThreadCount); 58 | read(d, mYoloWidth); 59 | read(d, mYoloHeight); 60 | read(d, mNumAnchors); 61 | memcpy(mAnchorsHost, d, MAX_ANCHORS * 2 * sizeof(float)); 62 | d += MAX_ANCHORS * 2 * sizeof(float); 63 | read(d, mNumClasses); 64 | read(d, mInputWidth); 65 | read(d, mInputHeight); 66 | read(d, mScaleXY); 67 | 68 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float))); 69 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice)); 70 | 71 | assert(d == a + length); 72 | } 73 | 74 | void YoloLayerPlugin::serialize(void* buffer) const 75 | { 76 | char* d = static_cast(buffer), *a = d; 77 | write(d, mThreadCount); 78 | write(d, mYoloWidth); 79 | write(d, mYoloHeight); 80 | write(d, mNumAnchors); 81 | memcpy(d, mAnchorsHost, MAX_ANCHORS * 2 * sizeof(float)); 82 | d += MAX_ANCHORS * 2 * sizeof(float); 83 | write(d, mNumClasses); 84 | write(d, mInputWidth); 85 | write(d, mInputHeight); 86 | write(d, mScaleXY); 87 | 88 | assert(d == a + getSerializationSize()); 89 | } 90 | 91 | size_t YoloLayerPlugin::getSerializationSize() const 92 | { 93 | return sizeof(mThreadCount) + \ 94 | sizeof(mYoloWidth) + sizeof(mYoloHeight) + \ 95 | sizeof(mNumAnchors) + MAX_ANCHORS * 2 * sizeof(float) + \ 96 | sizeof(mNumClasses) + \ 97 | sizeof(mInputWidth) + sizeof(mInputHeight) + \ 98 | sizeof(mScaleXY); 99 | } 100 | 101 | int YoloLayerPlugin::initialize() 102 | { 103 | return 0; 104 | } 105 | 106 | void YoloLayerPlugin::terminate() 107 | { 108 | CHECK(cudaFree(mAnchors)); 109 | } 110 | 111 | Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 112 | { 113 | //output the result to channel 114 | int totalsize = mYoloWidth * mYoloHeight * mNumAnchors * sizeof(Detection) / sizeof(float); 115 | return Dims3(totalsize, 1, 1); 116 | } 117 | 118 | void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace) 119 | { 120 | mPluginNamespace = pluginNamespace; 121 | } 122 | 123 | const char* YoloLayerPlugin::getPluginNamespace() const 124 | { 125 | return mPluginNamespace; 126 | } 127 | 128 | // Return the DataType of the plugin output at the requested index 129 | DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const 130 | { 131 | return DataType::kFLOAT; 132 | } 133 | 134 | // Return true if output tensor is broadcast across a batch. 135 | bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const 136 | { 137 | return false; 138 | } 139 | 140 | // Return true if plugin can use input that is broadcast across batch without replication. 141 | bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const 142 | { 143 | return false; 144 | } 145 | 146 | void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 147 | { 148 | } 149 | 150 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource. 151 | void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) 152 | { 153 | } 154 | 155 | // Detach the plugin object from its execution context. 156 | void YoloLayerPlugin::detachFromContext() 157 | { 158 | } 159 | 160 | const char* YoloLayerPlugin::getPluginType() const 161 | { 162 | return "YoloLayer_TRT"; 163 | } 164 | 165 | const char* YoloLayerPlugin::getPluginVersion() const 166 | { 167 | return "1"; 168 | } 169 | 170 | void YoloLayerPlugin::destroy() 171 | { 172 | delete this; 173 | } 174 | 175 | // Clone the plugin 176 | IPluginV2IOExt* YoloLayerPlugin::clone() const 177 | { 178 | YoloLayerPlugin *p = new YoloLayerPlugin(mYoloWidth, mYoloHeight, mNumAnchors, (float*) mAnchorsHost, mNumClasses, mInputWidth, mInputHeight, mScaleXY); 179 | p->setPluginNamespace(mPluginNamespace); 180 | return p; 181 | } 182 | 183 | inline __device__ float sigmoidGPU(float x) { return 1.0f / (1.0f + __expf(-x)); } 184 | 185 | inline __device__ float scale_sigmoidGPU(float x, float scale) 186 | { 187 | return scale * sigmoidGPU(x) - (scale - 1.0f) * 0.5f; 188 | } 189 | 190 | // CalDetection(): This kernel processes 1 yolo layer calculation. It 191 | // distributes calculations so that 1 GPU thread would be responsible 192 | // for each grid/anchor combination. 193 | // NOTE: The output (x, y, w, h) are between 0.0 and 1.0 194 | // (relative to orginal image width and height). 195 | __global__ void CalDetection(const float *input, float *output, int yolo_width, int yolo_height, int num_anchors, 196 | const float *anchors, int num_classes, int input_w, int input_h, float scale_x_y) 197 | { 198 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 199 | Detection* det = ((Detection*) output) + idx; 200 | int total_grids = yolo_width * yolo_height; 201 | if (idx >= total_grids * num_anchors) return; 202 | 203 | int anchor_idx = idx / total_grids; 204 | idx = idx - total_grids * anchor_idx; 205 | int info_len = 5 + num_classes; 206 | const float* cur_input = input + anchor_idx * (info_len * total_grids); 207 | 208 | int class_id; 209 | float max_cls_logit = -CUDART_INF_F; // minus infinity 210 | for (int i = 5; i < info_len; ++i) { 211 | float l = cur_input[idx + i * total_grids]; 212 | if (l > max_cls_logit) { 213 | max_cls_logit = l; 214 | class_id = i - 5; 215 | } 216 | } 217 | float max_cls_prob = sigmoidGPU(max_cls_logit); 218 | float box_prob = sigmoidGPU(cur_input[idx + 4 * total_grids]); 219 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH) 220 | // return; 221 | 222 | int row = idx / yolo_width; 223 | int col = idx % yolo_width; 224 | 225 | det->bbox[0] = (col + scale_sigmoidGPU(cur_input[idx + 0 * total_grids], scale_x_y)) / yolo_width; // [0, 1] 226 | det->bbox[1] = (row + scale_sigmoidGPU(cur_input[idx + 1 * total_grids], scale_x_y)) / yolo_height; // [0, 1] 227 | det->bbox[2] = __expf(cur_input[idx + 2 * total_grids]) * anchors[2 * anchor_idx] / input_w; // [0, 1] 228 | det->bbox[3] = __expf(cur_input[idx + 3 * total_grids]) * anchors[2 * anchor_idx + 1] / input_h; // [0, 1] 229 | 230 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left 231 | det->bbox[1] -= det->bbox[3] / 2; 232 | 233 | det->det_confidence = box_prob; 234 | det->class_id = class_id; 235 | det->class_confidence = max_cls_prob; 236 | } 237 | 238 | void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize) 239 | { 240 | int num_elements = batchSize * mNumAnchors * mYoloWidth * mYoloHeight; 241 | 242 | //CHECK(cudaMemset(output, 0, num_elements * sizeof(Detection))); 243 | 244 | CalDetection<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount>>> 245 | (inputs[0], output, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY); 246 | } 247 | 248 | int YoloLayerPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) 249 | { 250 | forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize); 251 | return 0; 252 | } 253 | 254 | YoloPluginCreator::YoloPluginCreator() 255 | { 256 | mPluginAttributes.clear(); 257 | 258 | mFC.nbFields = mPluginAttributes.size(); 259 | mFC.fields = mPluginAttributes.data(); 260 | } 261 | 262 | const char* YoloPluginCreator::getPluginName() const 263 | { 264 | return "YoloLayer_TRT"; 265 | } 266 | 267 | const char* YoloPluginCreator::getPluginVersion() const 268 | { 269 | return "1"; 270 | } 271 | 272 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() 273 | { 274 | return &mFC; 275 | } 276 | 277 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) 278 | { 279 | assert(!strcmp(name, getPluginName())); 280 | const PluginField* fields = fc->fields; 281 | int yolo_width, yolo_height, num_anchors = 0; 282 | float anchors[MAX_ANCHORS * 2]; 283 | int num_classes; 284 | int input_width, input_height; 285 | float scale_x_y = 1.0; 286 | 287 | for (int i = 0; i < fc->nbFields; ++i) 288 | { 289 | const char* attrName = fields[i].name; 290 | if (!strcmp(attrName, "yoloWidth")) 291 | { 292 | assert(fields[i].type == PluginFieldType::kINT32); 293 | yolo_width = *(static_cast(fields[i].data)); 294 | } 295 | else if (!strcmp(attrName, "yoloHeight")) 296 | { 297 | assert(fields[i].type == PluginFieldType::kINT32); 298 | yolo_height = *(static_cast(fields[i].data)); 299 | } 300 | else if (!strcmp(attrName, "numAnchors")) 301 | { 302 | assert(fields[i].type == PluginFieldType::kINT32); 303 | num_anchors = *(static_cast(fields[i].data)); 304 | } 305 | else if (!strcmp(attrName, "numClasses")) 306 | { 307 | assert(fields[i].type == PluginFieldType::kINT32); 308 | num_classes = *(static_cast(fields[i].data)); 309 | } 310 | else if (!strcmp(attrName, "inputWidth")) 311 | { 312 | assert(fields[i].type == PluginFieldType::kINT32); 313 | input_width = *(static_cast(fields[i].data)); 314 | } 315 | else if (!strcmp(attrName, "inputHeight")) 316 | { 317 | assert(fields[i].type == PluginFieldType::kINT32); 318 | input_height = *(static_cast(fields[i].data)); 319 | } 320 | else if (!strcmp(attrName, "anchors")){ 321 | assert(num_anchors > 0 && num_anchors <= MAX_ANCHORS); 322 | assert(fields[i].type == PluginFieldType::kFLOAT32); 323 | memcpy(anchors, static_cast(fields[i].data), num_anchors * 2 * sizeof(float)); 324 | } 325 | else if (!strcmp(attrName, "scaleXY")) 326 | { 327 | assert(fields[i].type == PluginFieldType::kFLOAT32); 328 | scale_x_y = *(static_cast(fields[i].data)); 329 | } 330 | } 331 | assert(yolo_width > 0 && yolo_height > 0); 332 | assert(anchors[0] > 0.0f && anchors[1] > 0.0f); 333 | assert(num_classes > 0); 334 | assert(input_width > 0 && input_height > 0); 335 | assert(scale_x_y >= 1.0); 336 | 337 | YoloLayerPlugin* obj = new YoloLayerPlugin(yolo_width, yolo_height, num_anchors, anchors, num_classes, input_width, input_height, scale_x_y); 338 | obj->setPluginNamespace(mNamespace.c_str()); 339 | return obj; 340 | } 341 | 342 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) 343 | { 344 | YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength); 345 | obj->setPluginNamespace(mNamespace.c_str()); 346 | return obj; 347 | } 348 | 349 | PluginFieldCollection YoloPluginCreator::mFC{}; 350 | std::vector YoloPluginCreator::mPluginAttributes; 351 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); 352 | } // namespace nvinfer1 353 | -------------------------------------------------------------------------------- /plugins/yolo_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef _YOLO_LAYER_H 2 | #define _YOLO_LAYER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "math_constants.h" 9 | #include "NvInfer.h" 10 | 11 | #define MAX_ANCHORS 6 12 | 13 | #define CHECK(status) \ 14 | do { \ 15 | auto ret = status; \ 16 | if (ret != 0) { \ 17 | std::cerr << "Cuda failure in file '" << __FILE__ \ 18 | << "' line " << __LINE__ \ 19 | << ": " << ret << std::endl; \ 20 | abort(); \ 21 | } \ 22 | } while (0) 23 | 24 | namespace Yolo 25 | { 26 | static constexpr float IGNORE_THRESH = 0.01f; 27 | 28 | struct alignas(float) Detection { 29 | float bbox[4]; // x, y, w, h 30 | float det_confidence; 31 | float class_id; 32 | float class_confidence; 33 | }; 34 | } 35 | 36 | namespace nvinfer1 37 | { 38 | class YoloLayerPlugin: public IPluginV2IOExt 39 | { 40 | public: 41 | YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y); 42 | YoloLayerPlugin(const void* data, size_t length); 43 | 44 | ~YoloLayerPlugin() override = default; 45 | 46 | int getNbOutputs() const override 47 | { 48 | return 1; 49 | } 50 | 51 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 52 | 53 | int initialize() override; 54 | 55 | void terminate() override; 56 | 57 | virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;} 58 | 59 | virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; 60 | 61 | virtual size_t getSerializationSize() const override; 62 | 63 | virtual void serialize(void* buffer) const override; 64 | 65 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override { 66 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 67 | } 68 | 69 | const char* getPluginType() const override; 70 | 71 | const char* getPluginVersion() const override; 72 | 73 | void destroy() override; 74 | 75 | IPluginV2IOExt* clone() const override; 76 | 77 | void setPluginNamespace(const char* pluginNamespace) override; 78 | 79 | const char* getPluginNamespace() const override; 80 | 81 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; 82 | 83 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; 84 | 85 | bool canBroadcastInputAcrossBatch(int inputIndex) const override; 86 | 87 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; 88 | 89 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override TRTNOEXCEPT; 90 | 91 | void detachFromContext() override; 92 | 93 | private: 94 | void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize = 1); 95 | 96 | int mThreadCount = 64; 97 | int mYoloWidth, mYoloHeight, mNumAnchors; 98 | float mAnchorsHost[MAX_ANCHORS * 2]; 99 | float *mAnchors; // allocated on GPU 100 | int mNumClasses; 101 | int mInputWidth, mInputHeight; 102 | float mScaleXY; 103 | 104 | const char* mPluginNamespace; 105 | }; 106 | 107 | class YoloPluginCreator : public IPluginCreator 108 | { 109 | public: 110 | YoloPluginCreator(); 111 | 112 | ~YoloPluginCreator() override = default; 113 | 114 | const char* getPluginName() const override; 115 | 116 | const char* getPluginVersion() const override; 117 | 118 | const PluginFieldCollection* getFieldNames() override; 119 | 120 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; 121 | 122 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; 123 | 124 | void setPluginNamespace(const char* libNamespace) override 125 | { 126 | mNamespace = libNamespace; 127 | } 128 | 129 | const char* getPluginNamespace() const override 130 | { 131 | return mNamespace.c_str(); 132 | } 133 | 134 | private: 135 | static PluginFieldCollection mFC; 136 | static std::vector mPluginAttributes; 137 | std::string mNamespace; 138 | }; 139 | }; 140 | 141 | #endif 142 | -------------------------------------------------------------------------------- /plugins/yolo_layer.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/plugins/yolo_layer.o -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | ## ! DO NOT MANUALLY INVOKE THIS setup.py, USE CATKIN INSTEAD 2 | 3 | from distutils.core import setup 4 | from catkin_pkg.python_setup import generate_distutils_setup 5 | 6 | # fetch values from package.xml 7 | setup_args = generate_distutils_setup( 8 | packages=["yolov4_trt"], package_dir={"": "src"} 9 | ) 10 | 11 | setup(**setup_args) 12 | -------------------------------------------------------------------------------- /src/image_converter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "image_converter.h" 24 | 25 | #include 26 | #include 27 | 28 | 29 | 30 | static imageFormat imageFormatFromEncoding( const std::string& encoding ) 31 | { 32 | if( encoding == sensor_msgs::image_encodings::BGR8 ) 33 | return IMAGE_BGR8; 34 | else if( encoding == sensor_msgs::image_encodings::BGRA8 ) 35 | return IMAGE_BGRA8; 36 | else if( encoding == sensor_msgs::image_encodings::RGB8 ) 37 | return IMAGE_RGB8; 38 | else if( encoding == sensor_msgs::image_encodings::RGBA8 ) 39 | return IMAGE_RGBA8; 40 | else if( encoding == sensor_msgs::image_encodings::MONO8 ) 41 | return IMAGE_GRAY8; 42 | else if( encoding == sensor_msgs::image_encodings::YUV422 ) 43 | return IMAGE_UYVY; 44 | else if( encoding == sensor_msgs::image_encodings::BAYER_RGGB8 ) 45 | return IMAGE_BAYER_RGGB; 46 | else if( encoding == sensor_msgs::image_encodings::BAYER_BGGR8 ) 47 | return IMAGE_BAYER_BGGR; 48 | else if( encoding == sensor_msgs::image_encodings::BAYER_GBRG8 ) 49 | return IMAGE_BAYER_GBRG; 50 | else if( encoding == sensor_msgs::image_encodings::BAYER_GRBG8 ) 51 | return IMAGE_BAYER_GRBG; 52 | 53 | return IMAGE_UNKNOWN; 54 | } 55 | 56 | static std::string imageFormatToEncoding( imageFormat fmt ) 57 | { 58 | switch(fmt) 59 | { 60 | case IMAGE_BGR8: return sensor_msgs::image_encodings::BGR8; 61 | case IMAGE_BGRA8: return sensor_msgs::image_encodings::BGRA8; 62 | case IMAGE_RGB8: return sensor_msgs::image_encodings::RGB8; 63 | case IMAGE_RGBA8: return sensor_msgs::image_encodings::RGBA8; 64 | case IMAGE_GRAY8: return sensor_msgs::image_encodings::MONO8; 65 | case IMAGE_UYVY: return sensor_msgs::image_encodings::YUV422; 66 | case IMAGE_BAYER_RGGB: return sensor_msgs::image_encodings::BAYER_RGGB8; 67 | case IMAGE_BAYER_BGGR: return sensor_msgs::image_encodings::BAYER_BGGR8; 68 | case IMAGE_BAYER_GBRG: return sensor_msgs::image_encodings::BAYER_GBRG8; 69 | case IMAGE_BAYER_GRBG: return sensor_msgs::image_encodings::BAYER_GRBG8; 70 | } 71 | 72 | return "invalid"; 73 | } 74 | 75 | 76 | // constructor 77 | imageConverter::imageConverter() 78 | { 79 | mWidth = 0; 80 | mHeight = 0; 81 | mSizeInput = 0; 82 | mSizeOutput = 0; 83 | 84 | mInputCPU = NULL; 85 | mInputGPU = NULL; 86 | 87 | mOutputCPU = NULL; 88 | mOutputGPU = NULL; 89 | } 90 | 91 | 92 | // destructor 93 | imageConverter::~imageConverter() 94 | { 95 | Free(); 96 | } 97 | 98 | 99 | // Free 100 | void imageConverter::Free() 101 | { 102 | if( mInputCPU != NULL ) 103 | { 104 | CUDA(cudaFreeHost(mInputCPU)); 105 | 106 | mInputCPU = NULL; 107 | mInputGPU = NULL; 108 | } 109 | 110 | if( mOutputCPU != NULL ) 111 | { 112 | CUDA(cudaFreeHost(mOutputCPU)); 113 | 114 | mOutputCPU = NULL; 115 | mOutputGPU = NULL; 116 | } 117 | } 118 | 119 | // Convert 120 | bool imageConverter::Convert( const sensor_msgs::ImageConstPtr& input ) 121 | { 122 | ROS_DEBUG("converting %ux%u %s image", input->width, input->height, input->encoding.c_str()); 123 | 124 | // parse the input format 125 | const imageFormat input_format = imageFormatFromEncoding(input->encoding); 126 | 127 | if( input_format == IMAGE_UNKNOWN ) 128 | { 129 | ROS_ERROR("image encoding %s is not a compatible format to use with ros_deep_learning", input->encoding.c_str()); 130 | return false; 131 | } 132 | 133 | // assure memory allocation 134 | if( !Resize(input->width, input->height, input_format) ) 135 | return false; 136 | 137 | // copy input to shared memory 138 | memcpy(mInputCPU, input->data.data(), imageFormatSize(input_format, input->width, input->height)); 139 | 140 | // convert image format 141 | if( CUDA_FAILED(cudaConvertColor(mInputGPU, input_format, mOutputGPU, InternalFormat, input->width, input->height)) ) 142 | { 143 | ROS_ERROR("failed to convert %ux%u image (from %s to %s) with CUDA", mWidth, mHeight, imageFormatToStr(input_format), imageFormatToStr(InternalFormat)); 144 | return false; 145 | } 146 | 147 | return true; 148 | } 149 | 150 | 151 | // Convert 152 | bool imageConverter::Convert( sensor_msgs::Image& msg, imageFormat format ) 153 | { 154 | return Convert(msg, format, ImageGPU()); 155 | } 156 | 157 | 158 | // Convert 159 | bool imageConverter::Convert( sensor_msgs::Image& msg, imageFormat format, PixelType* imageGPU ) 160 | { 161 | if( !mInputCPU || !imageGPU || mWidth == 0 || mHeight == 0 || mSizeInput == 0 || mSizeOutput == 0 ) 162 | return false; 163 | 164 | // perform colorspace conversion into the desired encoding 165 | // in this direction, we reverse use of input/output pointers 166 | if( CUDA_FAILED(cudaConvertColor(imageGPU, InternalFormat, mInputGPU, format, mWidth, mHeight)) ) 167 | { 168 | ROS_ERROR("failed to convert %ux%u image (from %s to %s) with CUDA", mWidth, mHeight, imageFormatToStr(InternalFormat), imageFormatToStr(format)); 169 | return false; 170 | } 171 | 172 | // calculate size of the msg 173 | const size_t msg_size = imageFormatSize(format, mWidth, mHeight); 174 | 175 | // allocate msg storage 176 | msg.data.resize(msg_size); 177 | 178 | // copy the converted image into the msg 179 | memcpy(msg.data.data(), mInputCPU, msg_size); 180 | 181 | // populate metadata 182 | msg.width = mWidth; 183 | msg.height = mHeight; 184 | msg.step = (mWidth * imageFormatDepth(format)) / 8; 185 | 186 | msg.encoding = imageFormatToEncoding(format); 187 | msg.is_bigendian = false; 188 | 189 | return true; 190 | } 191 | 192 | 193 | // Resize 194 | bool imageConverter::Resize( uint32_t width, uint32_t height, imageFormat inputFormat ) 195 | { 196 | const size_t input_size = imageFormatSize(inputFormat, width, height); 197 | const size_t output_size = imageFormatSize(InternalFormat, width, height); 198 | 199 | if( input_size != mSizeInput || output_size != mSizeOutput || mWidth != width || mHeight != height ) 200 | { 201 | Free(); 202 | 203 | if( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputGPU, input_size) || 204 | !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputGPU, output_size) ) 205 | { 206 | ROS_ERROR("failed to allocate memory for %ux%u image conversion", width, height); 207 | return false; 208 | } 209 | 210 | ROS_INFO("allocated CUDA memory for %ux%u image conversion", width, height); 211 | 212 | mWidth = width; 213 | mHeight = height; 214 | mSizeInput = input_size; 215 | mSizeOutput = output_size; 216 | } 217 | 218 | return true; 219 | } 220 | 221 | 222 | -------------------------------------------------------------------------------- /src/image_converter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef __ROS_DEEP_LEARNING_IMAGE_CONVERTER_H_ 24 | #define __ROS_DEEP_LEARNING_IMAGE_CONVERTER_H_ 25 | 26 | #include 27 | #include 28 | 29 | #include "ros_compat.h" 30 | 31 | 32 | /** 33 | * GPU image conversion 34 | */ 35 | class imageConverter 36 | { 37 | public: 38 | /** 39 | * Output image pixel type 40 | */ 41 | typedef uchar3 PixelType; 42 | 43 | /** 44 | * Image format used for internal CUDA processing 45 | */ 46 | static const imageFormat InternalFormat = IMAGE_RGB8; 47 | 48 | /** 49 | * Image format used for outputting ROS image messages 50 | */ 51 | static const imageFormat ROSOutputFormat = IMAGE_BGR8; 52 | 53 | /** 54 | * Constructor 55 | */ 56 | imageConverter(); 57 | 58 | /** 59 | * Destructor 60 | */ 61 | ~imageConverter(); 62 | 63 | /** 64 | * Free the memory 65 | */ 66 | void Free(); 67 | 68 | /** 69 | * Convert to 32-bit RGBA floating point 70 | */ 71 | bool Convert( const sensor_msgs::ImageConstPtr& input ); 72 | 73 | /** 74 | * Convert to ROS sensor_msgs::Image message 75 | */ 76 | bool Convert( sensor_msgs::Image& msg_out, imageFormat outputFormat ); 77 | 78 | /** 79 | * Convert to ROS sensor_msgs::Image message 80 | */ 81 | bool Convert( sensor_msgs::Image& msg_out, imageFormat outputFormat, PixelType* imageGPU ); 82 | 83 | /** 84 | * Resize the memory (if necessary) 85 | */ 86 | bool Resize( uint32_t width, uint32_t height, imageFormat inputFormat ); 87 | 88 | /** 89 | * Retrieve the converted image width 90 | */ 91 | inline uint32_t GetWidth() const { return mWidth; } 92 | 93 | /** 94 | * Retrieve the converted image height 95 | */ 96 | inline uint32_t GetHeight() const { return mHeight; } 97 | 98 | /** 99 | * Retrieve the GPU pointer of the converted image 100 | */ 101 | inline PixelType* ImageGPU() const { return mOutputGPU; } 102 | 103 | private: 104 | 105 | uint32_t mWidth; 106 | uint32_t mHeight; 107 | size_t mSizeInput; 108 | size_t mSizeOutput; 109 | 110 | void* mInputCPU; 111 | void* mInputGPU; 112 | 113 | PixelType* mOutputCPU; 114 | PixelType* mOutputGPU; 115 | }; 116 | 117 | #endif 118 | 119 | -------------------------------------------------------------------------------- /src/node_video_source.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "ros_compat.h" 24 | #include "image_converter.h" 25 | 26 | #include 27 | 28 | 29 | 30 | // globals 31 | videoSource* stream = NULL; 32 | imageConverter* image_cvt = NULL; 33 | Publisher image_pub = NULL; 34 | 35 | 36 | // aquire and publish camera frame 37 | bool aquireFrame() 38 | { 39 | imageConverter::PixelType* nextFrame = NULL; 40 | 41 | // get the latest frame 42 | if( !stream->Capture(&nextFrame, 1000) ) 43 | { 44 | ROS_ERROR("failed to capture next frame"); 45 | return false; 46 | } 47 | 48 | // assure correct image size 49 | if( !image_cvt->Resize(stream->GetWidth(), stream->GetHeight(), imageConverter::ROSOutputFormat) ) 50 | { 51 | ROS_ERROR("failed to resize camera image converter"); 52 | return false; 53 | } 54 | 55 | // populate the message 56 | sensor_msgs::Image msg; 57 | 58 | if( !image_cvt->Convert(msg, imageConverter::ROSOutputFormat, nextFrame) ) 59 | { 60 | ROS_ERROR("failed to convert video stream frame to sensor_msgs::Image"); 61 | return false; 62 | } 63 | 64 | // populate timestamp in header field 65 | msg.header.stamp = ROS_TIME_NOW(); 66 | 67 | // publish the message 68 | image_pub->publish(msg); 69 | ROS_DEBUG("published %ux%u video frame", stream->GetWidth(), stream->GetHeight()); 70 | 71 | return true; 72 | } 73 | 74 | 75 | // node main loop 76 | int main(int argc, char **argv) 77 | { 78 | /* 79 | * create node instance 80 | */ 81 | ROS_CREATE_NODE("video_source"); 82 | 83 | /* 84 | * declare parameters 85 | */ 86 | videoOptions video_options; 87 | 88 | std::string resource_str; 89 | std::string codec_str; 90 | 91 | int video_width = video_options.width; 92 | int video_height = video_options.height; 93 | 94 | ROS_DECLARE_PARAMETER("resource", resource_str); 95 | ROS_DECLARE_PARAMETER("codec", codec_str); 96 | ROS_DECLARE_PARAMETER("width", video_width); 97 | ROS_DECLARE_PARAMETER("height", video_height); 98 | ROS_DECLARE_PARAMETER("framerate", video_options.frameRate); 99 | ROS_DECLARE_PARAMETER("loop", video_options.loop); 100 | 101 | /* 102 | * retrieve parameters 103 | */ 104 | ROS_GET_PARAMETER("resource", resource_str); 105 | ROS_GET_PARAMETER("codec", codec_str); 106 | ROS_GET_PARAMETER("width", video_width); 107 | ROS_GET_PARAMETER("height", video_height); 108 | ROS_GET_PARAMETER("framerate", video_options.frameRate); 109 | ROS_GET_PARAMETER("loop", video_options.loop); 110 | 111 | if( resource_str.size() == 0 ) 112 | { 113 | ROS_ERROR("resource param wasn't set - please set the node's resource parameter to the input device/filename/URL"); 114 | return 0; 115 | } 116 | 117 | if( codec_str.size() != 0 ) 118 | video_options.codec = videoOptions::CodecFromStr(codec_str.c_str()); 119 | 120 | video_options.width = video_width; 121 | video_options.height = video_height; 122 | 123 | ROS_INFO("opening video source: %s", resource_str.c_str()); 124 | 125 | /* 126 | * open video source 127 | */ 128 | stream = videoSource::Create(resource_str.c_str(), video_options); 129 | 130 | if( !stream ) 131 | { 132 | ROS_ERROR("failed to open video source"); 133 | return 0; 134 | } 135 | 136 | 137 | /* 138 | * create image converter 139 | */ 140 | image_cvt = new imageConverter(); 141 | 142 | if( !image_cvt ) 143 | { 144 | ROS_ERROR("failed to create imageConverter"); 145 | return 0; 146 | } 147 | 148 | 149 | /* 150 | * advertise publisher topics 151 | */ 152 | ROS_CREATE_PUBLISHER(sensor_msgs::Image, "raw", 2, image_pub); 153 | 154 | 155 | /* 156 | * start the camera streaming 157 | */ 158 | if( !stream->Open() ) 159 | { 160 | ROS_ERROR("failed to start streaming video source"); 161 | return 0; 162 | } 163 | 164 | 165 | /* 166 | * start publishing video frames 167 | */ 168 | while( ROS_OK() ) 169 | { 170 | if( !aquireFrame() ) 171 | { 172 | if( !stream->IsStreaming() ) 173 | { 174 | ROS_INFO("stream is closed or reached EOS, exiting node..."); 175 | break; 176 | } 177 | } 178 | 179 | if( ROS_OK() ) 180 | ROS_SPIN_ONCE(); 181 | } 182 | 183 | 184 | /* 185 | * free resources 186 | */ 187 | delete stream; 188 | delete image_cvt; 189 | 190 | return 0; 191 | } 192 | 193 | -------------------------------------------------------------------------------- /src/ros_compat.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "ros_compat.h" 24 | 25 | #ifdef ROS2 26 | 27 | std::string __node_name_; 28 | rclcpp::Clock::SharedPtr __global_clock_; 29 | 30 | #endif 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/ros_compat.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef __ROS_DEEP_LEARNING_COMPATABILITY_H_ 24 | #define __ROS_DEEP_LEARNING_COMPATABILITY_H_ 25 | 26 | #ifdef ROS1 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #define ROS_CREATE_NODE(name) \ 36 | ros::init(argc, argv, name); \ 37 | ros::NodeHandle nh; \ 38 | ros::NodeHandle private_nh("~"); 39 | 40 | template 41 | using Publisher = ros::Publisher*; 42 | 43 | #define ROS_CREATE_PUBLISHER(msg, topic, queue, ptr) ros::Publisher __publisher_##ptr = private_nh.advertise(topic, queue); ptr = &__publisher_##ptr 44 | #define ROS_CREATE_PUBLISHER_STATUS(msg, topic, queue, callback, ptr) ros::Publisher __publisher_##ptr = private_nh.advertise(topic, queue, [&](const ros::SingleSubscriberPublisher& connect_msg){callback();}); ptr = &__publisher_##ptr 45 | 46 | #define ROS_CREATE_SUBSCRIBER(msg, topic, queue, callback) private_nh.subscribe(topic, queue, callback) 47 | #define ROS_SUBSCRIBER_TOPIC(subscriber) subscriber.getTopic() 48 | 49 | #define ROS_NUM_SUBSCRIBERS(publisher) publisher->getNumSubscribers() 50 | #define ROS_GET_NAMESPACE() private_nh.getNamespace() 51 | #define ROS_GET_PARAMETER(name, val) private_nh.getParam(name, val) 52 | #define ROS_GET_PARAMETER_OR(name, val, alt) private_nh.param(name, val, alt) 53 | #define ROS_SET_PARAMETER(name, val) private_nh.setParam(name, val) 54 | 55 | template static void __ros_declare_parameter( ros::NodeHandle& nh, const std::string& name, const T& default_value ) 56 | { 57 | if( !nh.hasParam(name) ) 58 | nh.setParam(name, default_value); 59 | } 60 | 61 | #define ROS_DECLARE_PARAMETER(name, default_value) __ros_declare_parameter(private_nh, name, default_value) 62 | 63 | #define ROS_TIME_NOW() ros::Time::now() 64 | #define ROS_SPIN() ros::spin() 65 | #define ROS_SPIN_ONCE() ros::spinOnce() 66 | #define ROS_OK() ros::ok() 67 | #define ROS_SHUTDOWN() ros::shutdown() 68 | 69 | #elif ROS2 70 | 71 | #include 72 | #include 73 | #include 74 | #include 75 | #include 76 | #include 77 | 78 | namespace vision_msgs 79 | { 80 | typedef msg::Classification2D Classification2D; 81 | typedef msg::Detection2D Detection2D; 82 | typedef msg::Detection2DArray Detection2DArray; 83 | typedef msg::ObjectHypothesis ObjectHypothesis; 84 | typedef msg::ObjectHypothesisWithPose ObjectHypothesisWithPose; 85 | typedef msg::VisionInfo VisionInfo; 86 | } 87 | 88 | namespace sensor_msgs 89 | { 90 | typedef msg::Image Image; 91 | typedef msg::Image::SharedPtr ImagePtr; 92 | typedef msg::Image::ConstSharedPtr ImageConstPtr; 93 | } 94 | 95 | namespace ros = rclcpp; 96 | 97 | extern std::string __node_name_; 98 | 99 | #define ROS_INFO(...) RCUTILS_LOG_INFO_NAMED(__node_name_.c_str(), __VA_ARGS__) 100 | #define ROS_DEBUG(...) RCUTILS_LOG_DEBUG_NAMED(__node_name_.c_str(), __VA_ARGS__) 101 | #define ROS_ERROR(...) RCUTILS_LOG_ERROR_NAMED(__node_name_.c_str(), __VA_ARGS__) 102 | 103 | #define ROS_CREATE_NODE(name) \ 104 | rclcpp::init(argc, argv); \ 105 | auto node = rclcpp::Node::make_shared(name, "/" name); \ 106 | __node_name_ = name; \ 107 | __global_clock_ = std::make_shared(RCL_ROS_TIME); 108 | 109 | template 110 | using Publisher = std::shared_ptr>; 111 | 112 | #define ROS_CREATE_PUBLISHER(msg, topic, queue, ptr) ptr = node->create_publisher(topic, queue) 113 | #define ROS_CREATE_PUBLISHER_STATUS(msg, topic, queue, callback, ptr) ptr = node->create_publisher(topic, queue); rclcpp::TimerBase::SharedPtr __timer_publisher_##ptr = node->create_wall_timer(std::chrono::milliseconds(500), \ 114 | [&](){ static int __subscribers_##ptr=0; const size_t __subscription_count=ptr->get_subscription_count(); if(__subscribers_##ptr != __subscription_count) { if(__subscription_count > __subscribers_##ptr) callback(); __subscribers_##ptr=__subscription_count; }}) 115 | 116 | #define ROS_CREATE_SUBSCRIBER(msg, topic, queue, callback) node->create_subscription(topic, queue, callback) 117 | #define ROS_SUBSCRIBER_TOPIC(subscriber) subscriber->get_topic_name(); 118 | 119 | #define ROS_NUM_SUBSCRIBERS(publisher) publisher->get_subscription_count() 120 | #define ROS_GET_NAMESPACE() node->get_namespace() 121 | #define ROS_GET_PARAMETER(name, val) node->get_parameter(name, val) 122 | #define ROS_GET_PARAMETER_OR(name, val, alt) node->get_parameter_or(name, val, alt) // TODO set undefined params in param server 123 | #define ROS_SET_PARAMETER(name, val) node->set_parameter(rclcpp::Parameter(name, val)) 124 | 125 | #define ROS_DECLARE_PARAMETER(name, default_value) node->declare_parameter(name, default_value) 126 | 127 | extern rclcpp::Clock::SharedPtr __global_clock_; 128 | 129 | #define ROS_TIME_NOW() __global_clock_->now() 130 | #define ROS_SPIN() rclcpp::spin(node) 131 | #define ROS_SPIN_ONCE() rclcpp::spin_some(node) 132 | #define ROS_OK() rclcpp::ok() 133 | #define ROS_SHUTDOWN() rclcpp::shutdown() 134 | 135 | #endif 136 | #endif 137 | 138 | -------------------------------------------------------------------------------- /trt_yolo_v3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import time 5 | 6 | import cv2 7 | import pycuda.autoinit # For initializing CUDA driver 8 | import pycuda.driver as cuda 9 | 10 | from utils.yolo_classes import get_cls_dict 11 | from utils.display import open_window, set_display, show_fps 12 | from utils.visualization import BBoxVisualization 13 | from utils.yolo_with_plugins import TrtYOLO 14 | 15 | import rospy 16 | import rospkg 17 | from yolov4_trt_ros.msg import Detector2DArray 18 | from yolov4_trt_ros.msg import Detector2D 19 | from vision_msgs.msg import BoundingBox2D 20 | from sensor_msgs.msg import Image 21 | from cv_bridge import CvBridge, CvBridgeError 22 | 23 | 24 | class yolov4(object): 25 | def __init__(self): 26 | """ Constructor """ 27 | 28 | self.bridge = CvBridge() 29 | self.init_params() 30 | self.init_yolo() 31 | self.cuda_ctx = cuda.Device(0).make_context() 32 | self.trt_yolo = TrtYOLO( 33 | (self.model_path + self.model), (self.h, self.w), self.category_num) 34 | 35 | def __del__(self): 36 | """ Destructor """ 37 | 38 | self.cuda_ctx.pop() 39 | del self.trt_yolo 40 | del self.cuda_ctx 41 | 42 | def clean_up(self): 43 | """ Backup destructor: Release cuda memory """ 44 | 45 | if self.trt_yolo is not None: 46 | self.cuda_ctx.pop() 47 | del self.trt_yolo 48 | del self.cuda_ctx 49 | 50 | def init_params(self): 51 | """ Initializes ros parameters """ 52 | 53 | rospack = rospkg.RosPack() 54 | package_path = rospack.get_path("yolov4_trt_ros") 55 | self.video_topic = rospy.get_param("/video_topic", "/video_source/raw") 56 | self.model = rospy.get_param("/model", "yolov3") 57 | self.model_path = rospy.get_param( 58 | "/model_path", package_path + "/yolo/") 59 | self.input_shape = rospy.get_param("/input_shape", "416") 60 | self.category_num = rospy.get_param("/category_number", 80) 61 | self.conf_th = rospy.get_param("/confidence_threshold", 0.5) 62 | self.show_img = rospy.get_param("/show_image", True) 63 | self.image_sub = rospy.Subscriber( 64 | self.video_topic, Image, self.img_callback, queue_size=1, buff_size=1920*1080*3) 65 | self.detection_pub = rospy.Publisher( 66 | "detections", Detector2DArray, queue_size=1) 67 | self.overlay_pub = rospy.Publisher( 68 | "/result/overlay", Image, queue_size=1) 69 | 70 | def init_yolo(self): 71 | """ Initialises yolo parameters required for trt engine """ 72 | 73 | if self.model.find('-') == -1: 74 | self.model = self.model + "-" + self.input_shape 75 | 76 | yolo_dim = self.model.split('-')[-1] 77 | 78 | if 'x' in yolo_dim: 79 | dim_split = yolo_dim.split('x') 80 | if len(dim_split) != 2: 81 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) 82 | self.w, self.h = int(dim_split[0]), int(dim_split[1]) 83 | else: 84 | self.h = self.w = int(yolo_dim) 85 | if self.h % 32 != 0 or self.w % 32 != 0: 86 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) 87 | 88 | cls_dict = get_cls_dict(self.category_num) 89 | self.vis = BBoxVisualization(cls_dict) 90 | 91 | 92 | def img_callback(self, ros_img): 93 | """Continuously capture images from camera and do object detection """ 94 | 95 | tic = time.time() 96 | 97 | # converts from ros_img to cv_img for processing 98 | try: 99 | cv_img = self.bridge.imgmsg_to_cv2( 100 | ros_img, desired_encoding="bgr8") 101 | rospy.logdebug("ROS Image converted for processing") 102 | except CvBridgeError as e: 103 | rospy.loginfo("Failed to convert image %s", str(e)) 104 | 105 | if cv_img is not None: 106 | boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th) 107 | 108 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) 109 | toc = time.time() 110 | fps = 1.0 / (toc - tic) 111 | 112 | self.publisher(boxes, confs, clss) 113 | 114 | if self.show_img: 115 | cv_img = show_fps(cv_img, fps) 116 | cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img) 117 | cv2.waitKey(1) 118 | 119 | # converts back to ros_img type for publishing 120 | try: 121 | overlay_img = self.bridge.cv2_to_imgmsg( 122 | cv_img, encoding="passthrough") 123 | rospy.logdebug("CV Image converted for publishing") 124 | self.overlay_pub.publish(overlay_img) 125 | except CvBridgeError as e: 126 | rospy.loginfo("Failed to convert image %s", str(e)) 127 | 128 | def publisher(self, boxes, confs, clss): 129 | """ Publishes to detector_msgs 130 | 131 | Parameters: 132 | boxes (List(List(int))) : Bounding boxes of all objects 133 | confs (List(double)) : Probability scores of all objects 134 | clss (List(int)) : Class ID of all classes 135 | """ 136 | detection2d = Detector2DArray() 137 | detection = Detector2D() 138 | detection2d.header.stamp = rospy.Time.now() 139 | 140 | for i in range(len(boxes)): 141 | # boxes : xmin, ymin, xmax, ymax 142 | for _ in boxes: 143 | detection.header.stamp = rospy.Time.now() 144 | detection.header.frame_id = "camera" # change accordingly 145 | detection.results.id = clss[i] 146 | detection.results.score = confs[i] 147 | 148 | detection.bbox.center.x = boxes[i][0] + (boxes[i][2] - boxes[i][0])/2 149 | detection.bbox.center.y = boxes[i][1] + (boxes[i][3] - boxes[i][1])/2 150 | detection.bbox.center.theta = 0.0 # change if required 151 | 152 | detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2]) 153 | detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3]) 154 | 155 | detection2d.detections.append(detection) 156 | 157 | self.detection_pub.publish(detection2d) 158 | 159 | 160 | def main(): 161 | yolo = yolov4() 162 | rospy.init_node('yolov4_detection', anonymous=True) 163 | try: 164 | rospy.spin() 165 | except KeyboardInterrupt: 166 | rospy.on_shutdown(yolo.clean_up()) 167 | print("Shutting down") 168 | 169 | 170 | if __name__ == '__main__': 171 | main() 172 | -------------------------------------------------------------------------------- /trt_yolo_v4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import time 5 | 6 | import cv2 7 | import pycuda.autoinit # For initializing CUDA driver 8 | import pycuda.driver as cuda 9 | 10 | from utils.yolo_classes import get_cls_dict 11 | from utils.display import open_window, set_display, show_fps 12 | from utils.visualization import BBoxVisualization 13 | from utils.yolo_with_plugins import TrtYOLO 14 | 15 | import rospy 16 | import rospkg 17 | from yolov4_trt_ros.msg import Detector2DArray 18 | from yolov4_trt_ros.msg import Detector2D 19 | from vision_msgs.msg import BoundingBox2D 20 | from sensor_msgs.msg import Image 21 | from cv_bridge import CvBridge, CvBridgeError 22 | 23 | 24 | class yolov4(object): 25 | def __init__(self): 26 | """ Constructor """ 27 | 28 | self.bridge = CvBridge() 29 | self.init_params() 30 | self.init_yolo() 31 | self.cuda_ctx = cuda.Device(0).make_context() 32 | self.trt_yolo = TrtYOLO( 33 | (self.model_path + self.model), (self.h, self.w), self.category_num) 34 | 35 | def __del__(self): 36 | """ Destructor """ 37 | 38 | self.cuda_ctx.pop() 39 | del self.trt_yolo 40 | del self.cuda_ctx 41 | 42 | def clean_up(self): 43 | """ Backup destructor: Release cuda memory """ 44 | 45 | if self.trt_yolo is not None: 46 | self.cuda_ctx.pop() 47 | del self.trt_yolo 48 | del self.cuda_ctx 49 | 50 | def init_params(self): 51 | """ Initializes ros parameters """ 52 | 53 | rospack = rospkg.RosPack() 54 | package_path = rospack.get_path("yolov4_trt_ros") 55 | self.video_topic = rospy.get_param("/video_topic", "/video_source/raw") 56 | self.model = rospy.get_param("/model", "yolov4Custom") 57 | self.model_path = rospy.get_param( 58 | "/model_path", package_path + "/yolo/") 59 | self.category_num = rospy.get_param("/category_number", 10) 60 | self.input_shape = rospy.get_param("/input_shape", "416") 61 | self.conf_th = rospy.get_param("/confidence_threshold", 0.5) 62 | self.show_img = rospy.get_param("/show_image", True) 63 | self.image_sub = rospy.Subscriber( 64 | self.video_topic, Image, self.img_callback, queue_size=1, buff_size=1920*1080*3) 65 | self.detection_pub = rospy.Publisher( 66 | "detections", Detector2DArray, queue_size=1) 67 | self.overlay_pub = rospy.Publisher( 68 | "/result/overlay", Image, queue_size=1) 69 | 70 | def init_yolo(self): 71 | """ Initialises yolo parameters required for trt engine """ 72 | 73 | if self.model.find('-') == -1: 74 | self.model = self.model + "-" + self.input_shape 75 | 76 | yolo_dim = self.model.split('-')[-1] 77 | 78 | if 'x' in yolo_dim: 79 | dim_split = yolo_dim.split('x') 80 | if len(dim_split) != 2: 81 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) 82 | self.w, self.h = int(dim_split[0]), int(dim_split[1]) 83 | else: 84 | self.h = self.w = int(yolo_dim) 85 | if self.h % 32 != 0 or self.w % 32 != 0: 86 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) 87 | 88 | cls_dict = get_cls_dict(self.category_num) 89 | self.vis = BBoxVisualization(cls_dict) 90 | 91 | 92 | def img_callback(self, ros_img): 93 | """Continuously capture images from camera and do object detection """ 94 | 95 | tic = time.time() 96 | 97 | # converts from ros_img to cv_img for processing 98 | try: 99 | cv_img = self.bridge.imgmsg_to_cv2( 100 | ros_img, desired_encoding="bgr8") 101 | rospy.logdebug("ROS Image converted for processing") 102 | except CvBridgeError as e: 103 | rospy.loginfo("Failed to convert image %s", str(e)) 104 | 105 | if cv_img is not None: 106 | boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th) 107 | 108 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) 109 | toc = time.time() 110 | fps = 1.0 / (toc - tic) 111 | 112 | self.publisher(boxes, confs, clss) 113 | 114 | if self.show_img: 115 | cv_img = show_fps(cv_img, fps) 116 | cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img) 117 | cv2.waitKey(1) 118 | 119 | # converts back to ros_img type for publishing 120 | try: 121 | overlay_img = self.bridge.cv2_to_imgmsg( 122 | cv_img, encoding="passthrough") 123 | rospy.logdebug("CV Image converted for publishing") 124 | self.overlay_pub.publish(overlay_img) 125 | except CvBridgeError as e: 126 | rospy.loginfo("Failed to convert image %s", str(e)) 127 | 128 | def publisher(self, boxes, confs, clss): 129 | """ Publishes to detector_msgs 130 | 131 | Parameters: 132 | boxes (List(List(int))) : Bounding boxes of all objects 133 | confs (List(double)) : Probability scores of all objects 134 | clss (List(int)) : Class ID of all classes 135 | """ 136 | detection2d = Detector2DArray() 137 | detection = Detector2D() 138 | detection2d.header.stamp = rospy.Time.now() 139 | detection2d.header.frame_id = "camera" # change accordingly 140 | 141 | for i in range(len(boxes)): 142 | # boxes : xmin, ymin, xmax, ymax 143 | for _ in boxes: 144 | detection.header.stamp = rospy.Time.now() 145 | detection.header.frame_id = "camera" # change accordingly 146 | detection.results.id = clss[i] 147 | detection.results.score = confs[i] 148 | 149 | detection.bbox.center.x = boxes[i][0] + (boxes[i][2] - boxes[i][0])/2 150 | detection.bbox.center.y = boxes[i][1] + (boxes[i][3] - boxes[i][1])/2 151 | detection.bbox.center.theta = 0.0 # change if required 152 | 153 | detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2]) 154 | detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3]) 155 | 156 | detection2d.detections.append(detection) 157 | 158 | self.detection_pub.publish(detection2d) 159 | 160 | 161 | def main(): 162 | yolo = yolov4() 163 | rospy.init_node('yolov4_trt_ros', anonymous=True) 164 | try: 165 | rospy.spin() 166 | except KeyboardInterrupt: 167 | rospy.on_shutdown(yolo.clean_up()) 168 | print("Shutting down") 169 | 170 | 171 | if __name__ == '__main__': 172 | main() 173 | -------------------------------------------------------------------------------- /trt_yolo_v4_batch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import time 5 | 6 | import cv2 7 | import pycuda.autoinit # For initializing CUDA driver 8 | import pycuda.driver as cuda 9 | 10 | from utils.yolo_classes import get_cls_dict 11 | from utils.display import open_window, set_display, show_fps 12 | from utils.visualization import BBoxVisualization 13 | from utils.yolo_with_plugins_batch import TrtYOLO 14 | 15 | import rospy 16 | import rospkg 17 | from yolov4_trt_ros.msg import Detector2DArray 18 | from yolov4_trt_ros.msg import Detector2D 19 | from vision_msgs.msg import BoundingBox2D 20 | from sensor_msgs.msg import Image 21 | from cv_bridge import CvBridge, CvBridgeError 22 | 23 | 24 | class yolov4(object): 25 | def __init__(self): 26 | """ Constructor """ 27 | 28 | self.bridge = CvBridge() 29 | self.init_params() 30 | self.init_yolo() 31 | self.cuda_ctx = cuda.Device(0).make_context() 32 | self.trt_yolo = TrtYOLO( 33 | (self.model_path + self.model), (self.h, self.w), self.category_num) 34 | 35 | def __del__(self): 36 | """ Destructor """ 37 | 38 | self.cuda_ctx.pop() 39 | del self.trt_yolo 40 | del self.cuda_ctx 41 | 42 | def clean_up(self): 43 | """ Backup destructor: Release cuda memory """ 44 | 45 | if self.trt_yolo is not None: 46 | self.cuda_ctx.pop() 47 | del self.trt_yolo 48 | del self.cuda_ctx 49 | 50 | def init_params(self): 51 | """ Initializes ros parameters """ 52 | 53 | rospack = rospkg.RosPack() 54 | package_path = rospack.get_path("yolov4_trt_ros") 55 | #self.video_topic1 = rospy.get_param( 56 | # "/video_topic1", "/zed/zed_node/left_raw/image_raw_color") 57 | self.video_topic1 = rospy.get_param( 58 | "/video_topic1", "video_source/raw") 59 | self.video_topic2 = rospy.get_param( 60 | "/video_topic2", "/zed/zed_node/left_raw/image_raw_color") 61 | self.video_topic3 = rospy.get_param( 62 | "/video_topic3", "/zed/zed_node/right_raw/image_raw_color") 63 | self.video_topic4 = rospy.get_param( 64 | "/video_topic4", "/zed/zed_node/right_raw/image_raw_color") 65 | self.model = rospy.get_param("/model", "yolov4Custom") 66 | self.model_path = rospy.get_param( 67 | "/model_path", package_path + "/yolo/") 68 | self.category_num = rospy.get_param("/category_number", 10) 69 | self.input_shape = rospy.get_param("/input_shape", "416") 70 | self.conf_th = rospy.get_param("/confidence_threshold", 0.5) 71 | self.show_img = rospy.get_param("/show_image", True) 72 | self.image_sub1 = rospy.Subscriber( 73 | self.video_topic1, Image, self.img_callback1, queue_size=1, buff_size=2**26) 74 | self.image_sub2 = rospy.Subscriber( 75 | self.video_topic2, Image, self.img_callback2, queue_size=1, buff_size=2**26) 76 | self.image_sub3 = rospy.Subscriber( 77 | self.video_topic3, Image, self.img_callback3, queue_size=1, buff_size=2**26) 78 | self.image_sub4 = rospy.Subscriber( 79 | self.video_topic4, Image, self.img_callback4, queue_size=1, buff_size=2**26) 80 | self.detection_pub = rospy.Publisher( 81 | "detections", Detector2DArray, queue_size=1) 82 | self.overlay_pub = rospy.Publisher( 83 | "/result/overlay", Image, queue_size=1) 84 | 85 | def init_yolo(self): 86 | """ Initialises yolo parameters required for trt engine """ 87 | 88 | if self.model.find('-') == -1: 89 | self.model = self.model + "-" + self.input_shape 90 | 91 | yolo_dim = self.model.split('-')[-1] 92 | 93 | if 'x' in yolo_dim: 94 | dim_split = yolo_dim.split('x') 95 | if len(dim_split) != 2: 96 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) 97 | self.w, self.h = int(dim_split[0]), int(dim_split[1]) 98 | else: 99 | self.h = self.w = int(yolo_dim) 100 | if self.h % 32 != 0 or self.w % 32 != 0: 101 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) 102 | 103 | cls_dict = get_cls_dict(self.category_num) 104 | self.vis = BBoxVisualization(cls_dict) 105 | 106 | def img_callback1(self, ros_img): 107 | """Continuously capture images from camera and do object detection """ 108 | 109 | # converts from ros_img to cv_img for processing 110 | try: 111 | cv_img = self.bridge.imgmsg_to_cv2( 112 | ros_img, desired_encoding="bgr8") 113 | rospy.logdebug("ROS Image converted for processing") 114 | except CvBridgeError as e: 115 | rospy.loginfo("Failed to convert image %s", str(e)) 116 | 117 | tic = time.time() 118 | boxes, confs, clss = self.trt_yolo.detect1(cv_img, self.conf_th) 119 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) 120 | toc = time.time() 121 | fps = 1.0 / (toc - tic) 122 | print("Cam1: " + str(fps) + "\n") 123 | self.publisher(boxes, confs, clss, "Cam1") 124 | 125 | if self.show_img: 126 | cv_img = show_fps(cv_img, fps) 127 | cv2.imshow("Cam 1", cv_img) 128 | cv2.waitKey(1) 129 | 130 | # converts back to ros_img type for publishing 131 | try: 132 | overlay_img = self.bridge.cv2_to_imgmsg( 133 | cv_img, encoding="passthrough") 134 | rospy.logdebug("CV Image converted for publishing") 135 | self.overlay_pub.publish(overlay_img) 136 | except CvBridgeError as e: 137 | rospy.loginfo("Failed to convert image %s", str(e)) 138 | 139 | def img_callback2(self, ros_img): 140 | """Continuously capture images from camera and do object detection """ 141 | 142 | # converts from ros_img to cv_img for processing 143 | # time both cv_bridge fns 144 | try: 145 | cv_img = self.bridge.imgmsg_to_cv2( 146 | ros_img, desired_encoding="bgr8") 147 | rospy.logdebug("ROS Image converted for processing") 148 | except CvBridgeError as e: 149 | rospy.loginfo("Failed to convert image %s", str(e)) 150 | 151 | tic = time.time() 152 | boxes, confs, clss = self.trt_yolo.detect2(cv_img, self.conf_th) 153 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) 154 | toc = time.time() 155 | fps = 1.0 / (toc - tic) 156 | print("Cam2: " + str(fps) + "\n") 157 | self.publisher(boxes, confs, clss, "Cam2") 158 | 159 | # time 1 160 | """ 161 | if self.show_img: 162 | cv_img = show_fps(cv_img, fps) 163 | cv2.imshow("Cam 2", cv_img) 164 | # time 2 165 | cv2.waitKey(1) 166 | """ 167 | 168 | # converts back to ros_img type for publishing 169 | try: 170 | overlay_img = self.bridge.cv2_to_imgmsg( 171 | cv_img, encoding="passthrough") 172 | rospy.logdebug("CV Image converted for publishing") 173 | self.overlay_pub.publish(overlay_img) 174 | except CvBridgeError as e: 175 | rospy.loginfo("Failed to convert image %s", str(e)) 176 | 177 | def img_callback3(self, ros_img): 178 | """Continuously capture images from camera and do object detection """ 179 | 180 | # converts from ros_img to cv_img for processing 181 | try: 182 | cv_img = self.bridge.imgmsg_to_cv2( 183 | ros_img, desired_encoding="bgr8") 184 | rospy.logdebug("ROS Image converted for processing") 185 | except CvBridgeError as e: 186 | rospy.loginfo("Failed to convert image %s", str(e)) 187 | 188 | tic = time.time() 189 | boxes, confs, clss = self.trt_yolo.detect3(cv_img, self.conf_th) 190 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) 191 | toc = time.time() 192 | fps = 1.0 / (toc - tic) 193 | print("Cam3: " + str(fps) + "\n") 194 | self.publisher(boxes, confs, clss, "Cam3") 195 | """ 196 | if self.show_img: 197 | cv_img = show_fps(cv_img, fps) 198 | cv2.imshow("Cam 3", cv_img) 199 | cv2.waitKey(1) 200 | """ 201 | # converts back to ros_img type for publishing 202 | try: 203 | overlay_img = self.bridge.cv2_to_imgmsg( 204 | cv_img, encoding="passthrough") 205 | rospy.logdebug("CV Image converted for publishing") 206 | self.overlay_pub.publish(overlay_img) 207 | except CvBridgeError as e: 208 | rospy.loginfo("Failed to convert image %s", str(e)) 209 | 210 | def img_callback4(self, ros_img): 211 | """Continuously capture images from camera and do object detection """ 212 | 213 | # converts from ros_img to cv_img for processing 214 | try: 215 | cv_img = self.bridge.imgmsg_to_cv2( 216 | ros_img, desired_encoding="bgr8") 217 | rospy.logdebug("ROS Image converted for processing") 218 | except CvBridgeError as e: 219 | rospy.loginfo("Failed to convert image %s", str(e)) 220 | 221 | tic = time.time() 222 | boxes, confs, clss = self.trt_yolo.detect4(cv_img, self.conf_th) 223 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) 224 | toc = time.time() 225 | fps = 1.0 / (toc - tic) 226 | print("Cam4: " + str(fps) + "\n") 227 | self.publisher(boxes, confs, clss, "Cam4") 228 | """ 229 | if self.show_img: 230 | cv_img = show_fps(cv_img, fps) 231 | cv2.imshow("Cam 3", cv_img) 232 | cv2.waitKey(1) 233 | """ 234 | # converts back to ros_img type for publishing 235 | try: 236 | overlay_img = self.bridge.cv2_to_imgmsg( 237 | cv_img, encoding="passthrough") 238 | rospy.logdebug("CV Image converted for publishing") 239 | self.overlay_pub.publish(overlay_img) 240 | except CvBridgeError as e: 241 | rospy.loginfo("Failed to convert image %s", str(e)) 242 | 243 | def publisher(self, boxes, confs, clss, frame): 244 | """ Publishes to detector_msgs 245 | 246 | Parameters: 247 | boxes (List(List(int))) : Bounding boxes of all objects 248 | confs (List(double)) : Probability scores of all objects 249 | clss (List(int)) : Class ID of all classes 250 | """ 251 | detection2d = Detector2DArray() 252 | detection = Detector2D() 253 | detection2d.header.stamp = rospy.Time.now() 254 | detection2d.header.frame_id = "camera" # change accordingly 255 | 256 | for i in range(len(boxes)): 257 | # boxes : xmin, ymin, xmax, ymax 258 | for _ in boxes: 259 | detection.header.stamp = rospy.Time.now() 260 | detection.header.frame_id = frame # change accordingly 261 | detection.results.id = clss[i] 262 | detection.results.score = confs[i] 263 | 264 | detection.bbox.center.x = boxes[i][0] + \ 265 | (boxes[i][2] - boxes[i][0])/2 266 | detection.bbox.center.y = boxes[i][1] + \ 267 | (boxes[i][3] - boxes[i][1])/2 268 | detection.bbox.center.theta = 0.0 # change if required 269 | 270 | detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2]) 271 | detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3]) 272 | 273 | detection2d.detections.append(detection) 274 | 275 | self.detection_pub.publish(detection2d) 276 | 277 | 278 | def main(): 279 | yolo = yolov4() 280 | rospy.init_node('yolov4_trt_ros', anonymous=True) 281 | try: 282 | rospy.spin() 283 | except KeyboardInterrupt: 284 | del yolo 285 | rospy.on_shutdown(yolo.clean_up()) 286 | print("Shutting down") 287 | 288 | 289 | if __name__ == '__main__': 290 | main() 291 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/utils/__init__.py -------------------------------------------------------------------------------- /utils/display.py: -------------------------------------------------------------------------------- 1 | 2 | """display.py 3 | """ 4 | 5 | 6 | import cv2 7 | 8 | 9 | def open_window(window_name, title, width=None, height=None): 10 | """Open the display window.""" 11 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 12 | cv2.setWindowTitle(window_name, title) 13 | if width and height: 14 | cv2.resizeWindow(window_name, width, height) 15 | 16 | 17 | def show_help_text(img, help_text): 18 | """Draw help text on image.""" 19 | cv2.putText(img, help_text, (11, 20), cv2.FONT_HERSHEY_PLAIN, 1.0, 20 | (32, 32, 32), 4, cv2.LINE_AA) 21 | cv2.putText(img, help_text, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1.0, 22 | (240, 240, 240), 1, cv2.LINE_AA) 23 | return img 24 | 25 | 26 | def show_fps(img, fps): 27 | """Draw fps number at top-left corner of the image.""" 28 | font = cv2.FONT_HERSHEY_PLAIN 29 | line = cv2.LINE_AA 30 | fps_text = 'FPS: {:.2f}'.format(fps) 31 | cv2.putText(img, fps_text, (11, 20), font, 1.0, (32, 32, 32), 4, line) 32 | cv2.putText(img, fps_text, (10, 20), font, 1.0, (240, 240, 240), 1, line) 33 | return img 34 | 35 | 36 | def set_display(window_name, full_scrn): 37 | """Set disply window to either full screen or normal.""" 38 | if full_scrn: 39 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 40 | cv2.WINDOW_FULLSCREEN) 41 | else: 42 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 43 | cv2.WINDOW_NORMAL) 44 | -------------------------------------------------------------------------------- /utils/visualization.py: -------------------------------------------------------------------------------- 1 | """visualization.py 2 | 3 | The BBoxVisualization class implements drawing of nice looking 4 | bounding boxes based on object detection results. 5 | """ 6 | 7 | 8 | import numpy as np 9 | import cv2 10 | 11 | 12 | # Constants 13 | ALPHA = 0.5 14 | FONT = cv2.FONT_HERSHEY_PLAIN 15 | TEXT_SCALE = 1.0 16 | TEXT_THICKNESS = 1 17 | BLACK = (0, 0, 0) 18 | WHITE = (255, 255, 255) 19 | 20 | 21 | def gen_colors(num_colors): 22 | """Generate different colors. 23 | 24 | # Arguments 25 | num_colors: total number of colors/classes. 26 | 27 | # Output 28 | bgrs: a list of (B, G, R) tuples which correspond to each of 29 | the colors/classes. 30 | """ 31 | import random 32 | import colorsys 33 | 34 | hsvs = [[float(x) / num_colors, 1., 0.7] for x in range(num_colors)] 35 | random.seed(1234) 36 | random.shuffle(hsvs) 37 | rgbs = list(map(lambda x: list(colorsys.hsv_to_rgb(*x)), hsvs)) 38 | bgrs = [(int(rgb[2] * 255), int(rgb[1] * 255), int(rgb[0] * 255)) 39 | for rgb in rgbs] 40 | return bgrs 41 | 42 | 43 | def draw_boxed_text(img, text, topleft, color): 44 | """Draw a transluent boxed text in white, overlayed on top of a 45 | colored patch surrounded by a black border. FONT, TEXT_SCALE, 46 | TEXT_THICKNESS and ALPHA values are constants (fixed) as defined 47 | on top. 48 | 49 | # Arguments 50 | img: the input image as a numpy array. 51 | text: the text to be drawn. 52 | topleft: XY coordinate of the topleft corner of the boxed text. 53 | color: color of the patch, i.e. background of the text. 54 | 55 | # Output 56 | img: note the original image is modified inplace. 57 | """ 58 | assert img.dtype == np.uint8 59 | img_h, img_w, _ = img.shape 60 | if topleft[0] >= img_w or topleft[1] >= img_h: 61 | return img 62 | margin = 3 63 | size = cv2.getTextSize(text, FONT, TEXT_SCALE, TEXT_THICKNESS) 64 | w = size[0][0] + margin * 2 65 | h = size[0][1] + margin * 2 66 | # the patch is used to draw boxed text 67 | patch = np.zeros((h, w, 3), dtype=np.uint8) 68 | patch[...] = color 69 | cv2.putText(patch, text, (margin+1, h-margin-2), FONT, TEXT_SCALE, 70 | WHITE, thickness=TEXT_THICKNESS, lineType=cv2.LINE_8) 71 | cv2.rectangle(patch, (0, 0), (w-1, h-1), BLACK, thickness=1) 72 | w = min(w, img_w - topleft[0]) # clip overlay at image boundary 73 | h = min(h, img_h - topleft[1]) 74 | # Overlay the boxed text onto region of interest (roi) in img 75 | roi = img[topleft[1]:topleft[1]+h, topleft[0]:topleft[0]+w, :] 76 | cv2.addWeighted(patch[0:h, 0:w, :], ALPHA, roi, 1 - ALPHA, 0, roi) 77 | return img 78 | 79 | 80 | class BBoxVisualization(): 81 | """BBoxVisualization class implements nice drawing of boudning boxes. 82 | 83 | # Arguments 84 | cls_dict: a dictionary used to translate class id to its name. 85 | """ 86 | 87 | def __init__(self, cls_dict): 88 | self.cls_dict = cls_dict 89 | self.colors = gen_colors(len(cls_dict)) 90 | 91 | def draw_bboxes(self, img, boxes, confs, clss): 92 | """Draw detected bounding boxes on the original image.""" 93 | for bb, cf, cl in zip(boxes, confs, clss): 94 | cl = int(cl) 95 | x_min, y_min, x_max, y_max = bb[0], bb[1], bb[2], bb[3] 96 | color = self.colors[cl] 97 | cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2) 98 | txt_loc = (max(x_min+2, 0), max(y_min+2, 0)) 99 | cls_name = self.cls_dict.get(cl, 'CLS{}'.format(cl)) 100 | txt = '{} {:.2f}'.format(cls_name, cf) 101 | img = draw_boxed_text(img, txt, txt_loc, color) 102 | return img 103 | -------------------------------------------------------------------------------- /utils/yolo_classes.py: -------------------------------------------------------------------------------- 1 | CLASSES_LIST = [ 2 | 'bike', 3 | 'bus', 4 | 'car', 5 | 'motor', 6 | 'person', 7 | 'rider', 8 | 'traffic light', 9 | 'traffic sign', 10 | 'train', 11 | 'truck', 12 | ] 13 | 14 | def get_cls_dict(category_num): 15 | """Get the class ID to name translation dictionary.""" 16 | if category_num == len(CLASSES_LIST): 17 | return {i: n for i, n in enumerate(CLASSES_LIST)} 18 | else: 19 | return {i: 'CLS%d' % i for i in range(category_num)} 20 | -------------------------------------------------------------------------------- /utils/yolo_with_plugins.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import ctypes 4 | 5 | import rospkg 6 | import numpy as np 7 | import cv2 8 | import tensorrt as trt 9 | import pycuda.driver as cuda 10 | import pycuda.autoinit 11 | 12 | rospack = rospkg.RosPack() 13 | plugin_path = rospack.get_path("yolov4_trt_ros") + "/plugins/libyolo_layer.so" 14 | 15 | try: 16 | ctypes.cdll.LoadLibrary(plugin_path) 17 | except OSError as e: 18 | raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so. ' 19 | 'Did you forget to do a "make" in the "./plugins/" ' 20 | 'subdirectory?') 21 | 22 | 23 | def _preprocess_yolo(img, input_shape): 24 | """Preprocess an image before TRT YOLO inferencing. 25 | 26 | # Args 27 | img: int8 numpy array of shape (img_h, img_w, 3) 28 | input_shape: a tuple of (H, W) 29 | 30 | # Returns 31 | preprocessed img: float32 numpy array of shape (3, H, W) 32 | """ 33 | img = cv2.resize(img, (input_shape[1], input_shape[0])) 34 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 35 | img = img.transpose((2, 0, 1)).astype(np.float32) 36 | img /= 255.0 37 | return img 38 | 39 | 40 | def _nms_boxes(detections, nms_threshold): 41 | """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding 42 | boxes with their confidence scores and return an array with the 43 | indexes of the bounding boxes we want to keep. 44 | 45 | # Args 46 | detections: Nx7 numpy arrays of 47 | [[x, y, w, h, box_confidence, class_id, class_prob], 48 | ......] 49 | """ 50 | x_coord = detections[:, 0] 51 | y_coord = detections[:, 1] 52 | width = detections[:, 2] 53 | height = detections[:, 3] 54 | box_confidences = detections[:, 4] 55 | 56 | areas = width * height 57 | ordered = box_confidences.argsort()[::-1] 58 | 59 | keep = list() 60 | while ordered.size > 0: 61 | # Index of the current element: 62 | i = ordered[0] 63 | keep.append(i) 64 | xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]]) 65 | yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]]) 66 | xx2 = np.minimum(x_coord[i] + width[i], 67 | x_coord[ordered[1:]] + width[ordered[1:]]) 68 | yy2 = np.minimum(y_coord[i] + height[i], 69 | y_coord[ordered[1:]] + height[ordered[1:]]) 70 | 71 | width1 = np.maximum(0.0, xx2 - xx1 + 1) 72 | height1 = np.maximum(0.0, yy2 - yy1 + 1) 73 | intersection = width1 * height1 74 | union = (areas[i] + areas[ordered[1:]] - intersection) 75 | iou = intersection / union 76 | indexes = np.where(iou <= nms_threshold)[0] 77 | ordered = ordered[indexes + 1] 78 | 79 | keep = np.array(keep) 80 | return keep 81 | 82 | 83 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold=0.5): 84 | """Postprocess TensorRT outputs. 85 | 86 | # Args 87 | trt_outputs: a list of 2 or 3 tensors, where each tensor 88 | contains a multiple of 7 float32 numbers in 89 | the order of [x, y, w, h, box_confidence, class_id, class_prob] 90 | conf_th: confidence threshold 91 | 92 | # Returns 93 | boxes, scores, classes (after NMS) 94 | """ 95 | # concatenate outputs of all yolo layers 96 | detections = np.concatenate( 97 | [o.reshape(-1, 7) for o in trt_outputs], axis=0) 98 | 99 | # drop detections with score lower than conf_th 100 | box_scores = detections[:, 4] * detections[:, 6] 101 | pos = np.where(box_scores >= conf_th) 102 | detections = detections[pos] 103 | 104 | # scale x, y, w, h from [0, 1] to pixel values 105 | detections[:, 0] *= img_w 106 | detections[:, 1] *= img_h 107 | detections[:, 2] *= img_w 108 | detections[:, 3] *= img_h 109 | 110 | # NMS 111 | nms_detections = np.zeros((0, 7), dtype=detections.dtype) 112 | for class_id in set(detections[:, 5]): 113 | idxs = np.where(detections[:, 5] == class_id) 114 | cls_detections = detections[idxs] 115 | keep = _nms_boxes(cls_detections, nms_threshold) 116 | nms_detections = np.concatenate( 117 | [nms_detections, cls_detections[keep]], axis=0) 118 | if len(nms_detections) == 0: 119 | boxes = np.zeros((0, 4), dtype=np.int) 120 | scores = np.zeros((0, 1), dtype=np.float32) 121 | classes = np.zeros((0, 1), dtype=np.float32) 122 | else: 123 | xx = nms_detections[:, 0].reshape(-1, 1) 124 | yy = nms_detections[:, 1].reshape(-1, 1) 125 | ww = nms_detections[:, 2].reshape(-1, 1) 126 | hh = nms_detections[:, 3].reshape(-1, 1) 127 | boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5 128 | boxes = boxes.astype(np.int) 129 | scores = nms_detections[:, 4] * nms_detections[:, 6] 130 | classes = nms_detections[:, 5] 131 | return boxes, scores, classes 132 | 133 | 134 | class HostDeviceMem(object): 135 | """Simple helper data class that's a little nicer to use than a 2-tuple.""" 136 | 137 | def __init__(self, host_mem, device_mem): 138 | self.host = host_mem 139 | self.device = device_mem 140 | 141 | def __str__(self): 142 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 143 | 144 | def __repr__(self): 145 | return self.__str__() 146 | 147 | 148 | def allocate_buffers(engine, grid_sizes): 149 | """Allocates all host/device in/out buffers required for an engine. 150 | Checked! 151 | """ 152 | 153 | inputs = [] 154 | outputs = [] 155 | bindings = [] 156 | output_idx = 0 157 | stream = cuda.Stream() 158 | for binding in engine: 159 | size = trt.volume(engine.get_binding_shape(binding)) * \ 160 | engine.max_batch_size 161 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 162 | # Allocate host and device buffers 163 | host_mem = cuda.pagelocked_empty(size, dtype) 164 | device_mem = cuda.mem_alloc(host_mem.nbytes) 165 | 166 | # Append the device buffer to device bindings. 167 | bindings.append(int(device_mem)) 168 | 169 | # Append to the appropriate list. 170 | if engine.binding_is_input(binding): 171 | inputs.append(HostDeviceMem(host_mem, device_mem)) 172 | 173 | else: 174 | # each grid has 3 anchors, each anchor generates a detection 175 | # output of 7 float32 values 176 | assert size == grid_sizes[output_idx] * \ 177 | 3 * 7 * engine.max_batch_size 178 | outputs.append(HostDeviceMem(host_mem, device_mem)) 179 | output_idx += 1 180 | 181 | return inputs, outputs, bindings, stream 182 | 183 | 184 | def get_yolo_grid_sizes(model_name, h, w): 185 | """Get grid sizes (w*h) for all yolo layers in the model.""" 186 | if 'yolov3' in model_name: 187 | if 'tiny' in model_name: 188 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)] 189 | else: 190 | return [(h // 32) * (w // 32), (h // 16) * (w // 16), (h // 8) * (w // 8)] 191 | elif 'yolov4' in model_name: 192 | if 'tiny' in model_name: 193 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)] 194 | else: 195 | return [(h // 8) * (w // 8), (h // 16) * (w // 16), (w // 32) * (h // 32)] 196 | else: 197 | raise ValueError('ERROR: unknown model (%s)!' % args.model) 198 | 199 | 200 | class TrtYOLO(object): 201 | """TrtYOLO class encapsulates things needed to run TRT YOLO.""" 202 | 203 | def _load_engine(self): 204 | TRTbin = '%s.trt' % self.model 205 | with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: 206 | return runtime.deserialize_cuda_engine(f.read()) 207 | 208 | def __init__(self, model, input_shape, category_num=80, cuda_ctx=None): 209 | """Initialize TensorRT plugins, engine and context.""" 210 | self.model = model 211 | self.input_shape = input_shape 212 | self.category_num = category_num 213 | self.cuda_ctx = cuda.Device(0).make_context() 214 | self.trt_logger = trt.Logger(trt.Logger.INFO) 215 | self.engine = self._load_engine() 216 | self.context = self.engine.create_execution_context() 217 | grid_sizes = get_yolo_grid_sizes( 218 | self.model, self.input_shape[0], self.input_shape[1]) 219 | self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers( 220 | self.engine, grid_sizes) 221 | 222 | def __del__(self): 223 | """Free CUDA memories.""" 224 | del self.outputs 225 | del self.inputs 226 | del self.stream 227 | 228 | def allocate_buffers(self, engine, grid_sizes): 229 | """Allocates all host/device in/out buffers required for an engine. 230 | Checked! 231 | """ 232 | 233 | inputs = [] 234 | outputs = [] 235 | bindings = [] 236 | output_idx = 0 237 | stream = cuda.Stream() 238 | 239 | for binding in engine: 240 | size = trt.volume(engine.get_binding_shape( 241 | binding)) * engine.max_batch_size 242 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 243 | # Allocate host and device buffers 244 | host_mem = cuda.pagelocked_empty(size, dtype) 245 | device_mem = cuda.mem_alloc(host_mem.nbytes) 246 | 247 | # Append the device buffer to device bindings. 248 | bindings.append(int(device_mem)) 249 | 250 | # Append to the appropriate list. 251 | if engine.binding_is_input(binding): 252 | inputs.append(HostDeviceMem(host_mem, device_mem)) 253 | 254 | else: 255 | # each grid has 3 anchors, each anchor generates a detection 256 | # output of 7 float32 values 257 | assert size == grid_sizes[output_idx] * \ 258 | 3 * 7 * engine.max_batch_size 259 | outputs.append(HostDeviceMem(host_mem, device_mem)) 260 | output_idx += 1 261 | 262 | return inputs, outputs, bindings, stream 263 | 264 | def detect(self, img, conf_th=0.3): 265 | """ Detect objects in the input image """ 266 | 267 | img_resized = _preprocess_yolo(img, self.input_shape) 268 | trt_outputs = [] 269 | 270 | # Set host input to the image. The do_inference() function 271 | # will copy the input to the GPU before executing. 272 | self.inputs[0].host = np.ascontiguousarray(img_resized) 273 | 274 | if self.cuda_ctx: 275 | self.cuda_ctx.push() 276 | 277 | self.bindings = [int(i) for i in self.bindings] 278 | 279 | context = self.context 280 | bindings = self.bindings 281 | inputs = self.inputs 282 | outputs = self.outputs 283 | stream = self.stream 284 | 285 | if trt.__version__[0] < '7': 286 | # Transfer input data to the GPU. 287 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] 288 | # Run inference. 289 | context.execute_async(batch_size=1, 290 | bindings=bindings, 291 | stream_handle=stream.handle) 292 | # Transfer predictions back from the GPU. 293 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] 294 | # Synchronize the stream 295 | stream.synchronize() 296 | # Return only the host outputs. 297 | else: 298 | # Transfer input data to the GPU. 299 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) 300 | for inp in inputs] 301 | # Run inference. 302 | context.execute_async_v2( 303 | bindings=self.bindings, stream_handle=stream.handle) 304 | # Transfer predictions back from the GPU. 305 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) 306 | for out in outputs] 307 | # Synchronize the stream 308 | stream.synchronize() 309 | 310 | trt_outputs = [out.host for out in outputs] 311 | 312 | del context 313 | 314 | if self.cuda_ctx: 315 | self.cuda_ctx.pop() 316 | 317 | boxes, scores, classes = _postprocess_yolo( 318 | trt_outputs, img.shape[1], img.shape[0], conf_th) 319 | 320 | # clip x1, y1, x2, y2 within original image 321 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1) 322 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1) 323 | return boxes, scores, classes 324 | -------------------------------------------------------------------------------- /utils/yolo_with_plugins_batch.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import ctypes 4 | 5 | import rospkg 6 | import numpy as np 7 | import cv2 8 | import tensorrt as trt 9 | import pycuda.driver as cuda 10 | import pycuda.autoinit 11 | from multiprocessing import Process 12 | 13 | rospack = rospkg.RosPack() 14 | plugin_path = rospack.get_path("yolov4_trt_ros") + "/plugins/libyolo_layer.so" 15 | 16 | try: 17 | ctypes.cdll.LoadLibrary(plugin_path) 18 | except OSError as e: 19 | raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so. ' 20 | 'Did you forget to do a "make" in the "./plugins/" ' 21 | 'subdirectory?') 22 | 23 | 24 | def _preprocess_yolo(img, input_shape): 25 | """Preprocess an image before TRT YOLO inferencing. 26 | 27 | # Args 28 | img: int8 numpy array of shape (img_h, img_w, 3) 29 | input_shape: a tuple of (H, W) 30 | 31 | # Returns 32 | preprocessed img: float32 numpy array of shape (3, H, W) 33 | """ 34 | img = cv2.resize(img, (input_shape[1], input_shape[0])) 35 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 36 | img = img.transpose((2, 0, 1)).astype(np.float32) 37 | img /= 255.0 38 | return img 39 | 40 | 41 | def _nms_boxes(detections, nms_threshold): 42 | """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding 43 | boxes with their confidence scores and return an array with the 44 | indexes of the bounding boxes we want to keep. 45 | 46 | # Args 47 | detections: Nx7 numpy arrays of 48 | [[x, y, w, h, box_confidence, class_id, class_prob], 49 | ......] 50 | """ 51 | x_coord = detections[:, 0] 52 | y_coord = detections[:, 1] 53 | width = detections[:, 2] 54 | height = detections[:, 3] 55 | box_confidences = detections[:, 4] 56 | 57 | areas = width * height 58 | ordered = box_confidences.argsort()[::-1] 59 | 60 | keep = list() 61 | while ordered.size > 0: 62 | # Index of the current element: 63 | i = ordered[0] 64 | keep.append(i) 65 | xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]]) 66 | yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]]) 67 | xx2 = np.minimum(x_coord[i] + width[i], 68 | x_coord[ordered[1:]] + width[ordered[1:]]) 69 | yy2 = np.minimum(y_coord[i] + height[i], 70 | y_coord[ordered[1:]] + height[ordered[1:]]) 71 | 72 | width1 = np.maximum(0.0, xx2 - xx1 + 1) 73 | height1 = np.maximum(0.0, yy2 - yy1 + 1) 74 | intersection = width1 * height1 75 | union = (areas[i] + areas[ordered[1:]] - intersection) 76 | iou = intersection / union 77 | indexes = np.where(iou <= nms_threshold)[0] 78 | ordered = ordered[indexes + 1] 79 | 80 | keep = np.array(keep) 81 | return keep 82 | 83 | 84 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold=0.5): 85 | """Postprocess TensorRT outputs. 86 | 87 | # Args 88 | trt_outputs: a list of 2 or 3 tensors, where each tensor 89 | contains a multiple of 7 float32 numbers in 90 | the order of [x, y, w, h, box_confidence, class_id, class_prob] 91 | conf_th: confidence threshold 92 | 93 | # Returns 94 | boxes, scores, classes (after NMS) 95 | """ 96 | # concatenate outputs of all yolo layers 97 | detections = np.concatenate( 98 | [o.reshape(-1, 7) for o in trt_outputs], axis=0) 99 | 100 | # drop detections with score lower than conf_th 101 | box_scores = detections[:, 4] * detections[:, 6] 102 | pos = np.where(box_scores >= conf_th) 103 | detections = detections[pos] 104 | 105 | # scale x, y, w, h from [0, 1] to pixel values 106 | detections[:, 0] *= img_w 107 | detections[:, 1] *= img_h 108 | detections[:, 2] *= img_w 109 | detections[:, 3] *= img_h 110 | 111 | # NMS 112 | nms_detections = np.zeros((0, 7), dtype=detections.dtype) 113 | for class_id in set(detections[:, 5]): 114 | idxs = np.where(detections[:, 5] == class_id) 115 | cls_detections = detections[idxs] 116 | keep = _nms_boxes(cls_detections, nms_threshold) 117 | nms_detections = np.concatenate( 118 | [nms_detections, cls_detections[keep]], axis=0) 119 | if len(nms_detections) == 0: 120 | boxes = np.zeros((0, 4), dtype=np.int) 121 | scores = np.zeros((0, 1), dtype=np.float32) 122 | classes = np.zeros((0, 1), dtype=np.float32) 123 | else: 124 | xx = nms_detections[:, 0].reshape(-1, 1) 125 | yy = nms_detections[:, 1].reshape(-1, 1) 126 | ww = nms_detections[:, 2].reshape(-1, 1) 127 | hh = nms_detections[:, 3].reshape(-1, 1) 128 | boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5 129 | boxes = boxes.astype(np.int) 130 | scores = nms_detections[:, 4] * nms_detections[:, 6] 131 | classes = nms_detections[:, 5] 132 | return boxes, scores, classes 133 | 134 | 135 | class HostDeviceMem(object): 136 | """Simple helper data class that's a little nicer to use than a 2-tuple.""" 137 | 138 | def __init__(self, host_mem, device_mem): 139 | self.host = host_mem 140 | self.device = device_mem 141 | 142 | def __str__(self): 143 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 144 | 145 | def __repr__(self): 146 | return self.__str__() 147 | 148 | 149 | def allocate_buffers(engine, grid_sizes): 150 | """Allocates all host/device in/out buffers required for an engine. 151 | Checked! 152 | """ 153 | 154 | inputs = [] 155 | outputs = [] 156 | bindings = [] 157 | output_idx = 0 158 | stream = cuda.Stream() 159 | for binding in engine: 160 | size = trt.volume(engine.get_binding_shape(binding)) * \ 161 | engine.max_batch_size 162 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 163 | # Allocate host and device buffers 164 | host_mem = cuda.pagelocked_empty(size, dtype) 165 | device_mem = cuda.mem_alloc(host_mem.nbytes) 166 | 167 | # Append the device buffer to device bindings. 168 | bindings.append(int(device_mem)) 169 | 170 | # Append to the appropriate list. 171 | if engine.binding_is_input(binding): 172 | inputs.append(HostDeviceMem(host_mem, device_mem)) 173 | 174 | else: 175 | # each grid has 3 anchors, each anchor generates a detection 176 | # output of 7 float32 values 177 | assert size == grid_sizes[output_idx] * \ 178 | 3 * 7 * engine.max_batch_size 179 | outputs.append(HostDeviceMem(host_mem, device_mem)) 180 | output_idx += 1 181 | 182 | return inputs, outputs, bindings, stream 183 | 184 | 185 | def get_yolo_grid_sizes(model_name, h, w): 186 | """Get grid sizes (w*h) for all yolo layers in the model.""" 187 | if 'yolov3' in model_name: 188 | if 'tiny' in model_name: 189 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)] 190 | else: 191 | return [(h // 32) * (w // 32), (h // 16) * (w // 16), (h // 8) * (w // 8)] 192 | elif 'yolov4' in model_name: 193 | if 'tiny' in model_name: 194 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)] 195 | else: 196 | return [(h // 8) * (w // 8), (h // 16) * (w // 16), (w // 32) * (h // 32)] 197 | else: 198 | raise ValueError('ERROR: unknown model (%s)!' % args.model) 199 | 200 | 201 | class TrtYOLO(object): 202 | """TrtYOLO class encapsulates things needed to run TRT YOLO.""" 203 | 204 | def _load_engine(self): 205 | TRTbin = '%s.trt' % self.model 206 | with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: 207 | return runtime.deserialize_cuda_engine(f.read()) 208 | 209 | def __init__(self, model, input_shape, category_num=80, cuda_ctx=None): 210 | """Initialize TensorRT plugins, engine and context.""" 211 | self.model = model 212 | self.input_shape = input_shape 213 | self.category_num = category_num 214 | self.cuda_ctx = cuda.Device(0).make_context() 215 | self.trt_logger = trt.Logger(trt.Logger.INFO) 216 | self.engine = self._load_engine() 217 | self.context = self.engine.create_execution_context() 218 | grid_sizes = get_yolo_grid_sizes( 219 | self.model, self.input_shape[0], self.input_shape[1]) 220 | self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers( 221 | self.engine, grid_sizes) 222 | self.overall_boxes = [] 223 | self.overall_scores = [] 224 | self.overall_classes = [] 225 | 226 | def __del__(self): 227 | """Free CUDA memories.""" 228 | del self.outputs 229 | del self.inputs 230 | del self.stream 231 | 232 | def allocate_buffers(self, engine, grid_sizes): 233 | """Allocates all host/device in/out buffers required for an engine. 234 | Checked! 235 | """ 236 | 237 | inputs = [] 238 | outputs = [] 239 | bindings = [] 240 | output_idx = 0 241 | stream = cuda.Stream() 242 | 243 | for binding in engine: 244 | size = trt.volume(engine.get_binding_shape( 245 | binding)) * engine.max_batch_size 246 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 247 | # Allocate host and device buffers 248 | host_mem = cuda.pagelocked_empty(size, dtype) 249 | device_mem = cuda.mem_alloc(host_mem.nbytes) 250 | 251 | # Append the device buffer to device bindings. 252 | bindings.append(int(device_mem)) 253 | 254 | # Append to the appropriate list. 255 | if engine.binding_is_input(binding): 256 | inputs.append(HostDeviceMem(host_mem, device_mem)) 257 | 258 | else: 259 | # each grid has 3 anchors, each anchor generates a detection 260 | # output of 7 float32 values 261 | assert size == grid_sizes[output_idx] * \ 262 | 3 * 7 * engine.max_batch_size 263 | outputs.append(HostDeviceMem(host_mem, device_mem)) 264 | output_idx += 1 265 | 266 | return inputs, outputs, bindings, stream 267 | 268 | def detect1(self, img, conf_th=0.3): 269 | """ Detect objects in the input image """ 270 | 271 | img_resized = _preprocess_yolo(img, self.input_shape) 272 | trt_outputs = [] 273 | 274 | # Set host input to the image. The do_inference() function 275 | # will copy the input to the GPU before executing. 276 | self.inputs[0].host = np.ascontiguousarray(img_resized) 277 | 278 | # if self.cuda_ctx: 279 | self.cuda_ctx.push() 280 | 281 | self.bindings = [int(i) for i in self.bindings] 282 | 283 | context = self.context 284 | bindings = self.bindings 285 | inputs = self.inputs 286 | outputs = self.outputs 287 | stream = self.stream 288 | 289 | # Transfer input data to the GPU. 290 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) 291 | for inp in inputs] 292 | # Run inference. 293 | context.execute_async_v2( 294 | bindings=self.bindings, stream_handle=stream.handle) 295 | # Transfer predictions back from the GPU. 296 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) 297 | for out in outputs] 298 | # Synchronize the stream 299 | stream.synchronize() 300 | 301 | trt_outputs = [out.host for out in outputs] 302 | 303 | del context 304 | 305 | # if self.cuda_ctx: 306 | self.cuda_ctx.pop() 307 | 308 | boxes, scores, classes = _postprocess_yolo( 309 | trt_outputs, img.shape[1], img.shape[0], conf_th) 310 | 311 | # clip x1, y1, x2, y2 within original image 312 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1) 313 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1) 314 | return boxes, scores, classes 315 | 316 | def detect2(self, img, conf_th=0.3): 317 | """ Detect objects in the input image """ 318 | 319 | img_resized = _preprocess_yolo(img, self.input_shape) 320 | trt_outputs = [] 321 | 322 | # Set host input to the image. The do_inference() function 323 | # will copy the input to the GPU before executing. 324 | self.inputs[0].host = np.ascontiguousarray(img_resized) 325 | 326 | # if self.cuda_ctx: 327 | self.cuda_ctx.push() 328 | 329 | self.bindings = [int(i) for i in self.bindings] 330 | 331 | context = self.context 332 | bindings = self.bindings 333 | inputs = self.inputs 334 | outputs = self.outputs 335 | stream = self.stream 336 | 337 | # Transfer input data to the GPU. 338 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) 339 | for inp in inputs] 340 | # Run inference. 341 | context.execute_async_v2( 342 | bindings=self.bindings, stream_handle=stream.handle) 343 | # Transfer predictions back from the GPU. 344 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) 345 | for out in outputs] 346 | # Synchronize the stream 347 | stream.synchronize() 348 | 349 | trt_outputs = [out.host for out in outputs] 350 | 351 | del context 352 | 353 | # if self.cuda_ctx: 354 | self.cuda_ctx.pop() 355 | 356 | boxes, scores, classes = _postprocess_yolo( 357 | trt_outputs, img.shape[1], img.shape[0], conf_th) 358 | 359 | # clip x1, y1, x2, y2 within original image 360 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1) 361 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1) 362 | return boxes, scores, classes 363 | 364 | def detect3(self, img, conf_th=0.3): 365 | """ Detect objects in the input image """ 366 | 367 | img_resized = _preprocess_yolo(img, self.input_shape) 368 | trt_outputs = [] 369 | 370 | # Set host input to the image. The do_inference() function 371 | # will copy the input to the GPU before executing. 372 | self.inputs[0].host = np.ascontiguousarray(img_resized) 373 | 374 | # if self.cuda_ctx: 375 | self.cuda_ctx.push() 376 | 377 | self.bindings = [int(i) for i in self.bindings] 378 | 379 | context = self.context 380 | bindings = self.bindings 381 | inputs = self.inputs 382 | outputs = self.outputs 383 | stream = self.stream 384 | 385 | # Transfer input data to the GPU. 386 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) 387 | for inp in inputs] 388 | # Run inference. 389 | context.execute_async_v2( 390 | bindings=self.bindings, stream_handle=stream.handle) 391 | # Transfer predictions back from the GPU. 392 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) 393 | for out in outputs] 394 | # Synchronize the stream 395 | stream.synchronize() 396 | 397 | trt_outputs = [out.host for out in outputs] 398 | 399 | del context 400 | 401 | # if self.cuda_ctx: 402 | self.cuda_ctx.pop() 403 | 404 | boxes, scores, classes = _postprocess_yolo( 405 | trt_outputs, img.shape[1], img.shape[0], conf_th) 406 | 407 | # clip x1, y1, x2, y2 within original image 408 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1) 409 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1) 410 | return boxes, scores, classes 411 | 412 | def detect4(self, img, conf_th=0.3): 413 | """ Detect objects in the input image """ 414 | 415 | img_resized = _preprocess_yolo(img, self.input_shape) 416 | trt_outputs = [] 417 | 418 | # Set host input to the image. The do_inference() function 419 | # will copy the input to the GPU before executing. 420 | self.inputs[0].host = np.ascontiguousarray(img_resized) 421 | 422 | # if self.cuda_ctx: 423 | self.cuda_ctx.push() 424 | 425 | self.bindings = [int(i) for i in self.bindings] 426 | 427 | context = self.context 428 | bindings = self.bindings 429 | inputs = self.inputs 430 | outputs = self.outputs 431 | stream = self.stream 432 | 433 | # Transfer input data to the GPU. 434 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) 435 | for inp in inputs] 436 | # Run inference. 437 | context.execute_async_v2( 438 | bindings=self.bindings, stream_handle=stream.handle) 439 | # Transfer predictions back from the GPU. 440 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) 441 | for out in outputs] 442 | # Synchronize the stream 443 | stream.synchronize() 444 | 445 | trt_outputs = [out.host for out in outputs] 446 | 447 | del context 448 | 449 | # if self.cuda_ctx: 450 | self.cuda_ctx.pop() 451 | 452 | boxes, scores, classes = _postprocess_yolo( 453 | trt_outputs, img.shape[1], img.shape[0], conf_th) 454 | 455 | # clip x1, y1, x2, y2 within original image 456 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1) 457 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1) 458 | return boxes, scores, classes 459 | -------------------------------------------------------------------------------- /yolo/__pycache__/plugins.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/yolo/__pycache__/plugins.cpython-36.pyc -------------------------------------------------------------------------------- /yolo/convert_yolo_trt.sh: -------------------------------------------------------------------------------- 1 | echo "YOLOv3 or YOLOv4. Input 3 or 4" 2 | read model_type 3 | 4 | echo "Do you want to download the model? [Y/N]" 5 | read download 6 | 7 | if [[ $download == 'Y' ]] 8 | then 9 | if [[ $model_type == 3 ]] 10 | then 11 | wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -q --show-progress --no-clobber 12 | wget https://pjreddie.com/media/files/yolov3.weights -q --show-progress --no-clobber 13 | else 14 | wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg -q --show-progress --no-clobber 15 | wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -q --show-progress --no-clobber 16 | fi 17 | fi 18 | 19 | echo "What is the input shape? Input 288 or 416 or 608" 20 | read input_shape 21 | 22 | if [[ $model_type == 3 ]] 23 | then 24 | if [[ $input_shape == 288 ]] 25 | then 26 | echo "Creating yolov3-288.cfg and yolov3-288.weights" 27 | cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov3-288.cfg 28 | ln -sf yolov3.weights yolov3-288.weights 29 | fi 30 | if [[ $input_shape == 416 ]] 31 | then 32 | echo "Creating yolov3-416.cfg and yolov3-416.weights" 33 | cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov3-416.cfg 34 | ln -sf yolov3.weights yolov3-416.weights 35 | fi 36 | if [[ $input_shape == 608 ]] 37 | then 38 | echo "Creating yolov3-608.cfg and yolov3-608.weights" 39 | cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' > yolov3-608.cfg 40 | ln -sf yolov3.weights yolov3-608.weights 41 | fi 42 | else 43 | if [[ $input_shape == 288 ]] 44 | then 45 | echo "Creating yolov4-288.cfg and yolov4-288.weights" 46 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov4-288.cfg 47 | ln -sf yolov4.weights yolov4-288.weights 48 | fi 49 | if [[ $input_shape == 416 ]] 50 | then 51 | echo "Creating yolov4-416.cfg and yolov4-416.weights" 52 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov4-416.cfg 53 | ln -sf yolov4.weights yolov4-416.weights 54 | fi 55 | if [[ $input_shape == 608 ]] 56 | then 57 | echo "Creating yolov4-608.cfg and yolov4-608.weights" 58 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' > yolov4-608.cfg 59 | ln -sf yolov4.weights yolov4-608.weights 60 | fi 61 | fi 62 | 63 | echo "How many categories are there?" 64 | read category_num 65 | model_name="yolov${model_type}-${input_shape}" 66 | 67 | # convert from yolo to onnx 68 | python3 yolo_to_onnx.py -m $model_name -c $category_num 69 | 70 | echo "Done converting to .onnx" 71 | echo "..." 72 | echo "Now converting to .trt" 73 | 74 | # convert from onnx to trt 75 | python3 onnx_to_tensorrt.py -m $model_name -c $category_num --verbose 76 | 77 | echo "Conversion from yolo to trt done!" 78 | -------------------------------------------------------------------------------- /yolo/download_yolo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # yolov3-tiny 6 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg -q --show-progress --no-clobber 7 | #wget https://pjreddie.com/media/files/yolov3-tiny.weights -q --show-progress --no-clobber 8 | 9 | # yolov3 10 | #wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -q --show-progress --no-clobber 11 | #wget https://pjreddie.com/media/files/yolov3.weights -q --show-progress --no-clobber 12 | 13 | # yolov3-spp 14 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg -q --show-progress --no-clobber 15 | #wget https://pjreddie.com/media/files/yolov3-spp.weights -q --show-progress --no-clobber 16 | 17 | # yolov4-tiny 18 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg -q --show-progress --no-clobber 19 | #wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights -q --show-progress --no-clobber 20 | 21 | # yolov4 22 | wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg -q --show-progress --no-clobber 23 | wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -q --show-progress --no-clobber 24 | 25 | #echo 26 | #echo "Creating yolov3-tiny-288.cfg and yolov3-tiny-288.weights" 27 | #cat yolov3-tiny.cfg | sed -e '8s/width=416/width=288/' | sed -e '9s/height=416/height=288/' > yolov3-tiny-288.cfg 28 | #echo >> yolov3-tiny-288.cfg 29 | #ln -sf yolov3-tiny.weights yolov3-tiny-288.weights 30 | #echo "Creating yolov3-tiny-416.cfg and yolov3-tiny-416.weights" 31 | #cp yolov3-tiny.cfg yolov3-tiny-416.cfg 32 | #echo >> yolov3-tiny-416.cfg 33 | #ln -sf yolov3-tiny.weights yolov3-tiny-416.weights 34 | 35 | #echo "Creating yolov3-288.cfg and yolov3-288.weights" 36 | #cat yolov3.cfg | sed -e '8s/width=608/width=288/' | sed -e '9s/height=608/height=288/' > yolov3-288.cfg 37 | #ln -sf yolov3.weights yolov3-288.weights 38 | #echo "Creating yolov3-416.cfg and yolov3-416.weights" 39 | #cat yolov3.cfg | sed -e '8s/width=608/width=416/' | sed -e '9s/height=608/height=416/' > yolov3-416.cfg 40 | #ln -sf yolov3.weights yolov3-416.weights 41 | #echo "Creating yolov3-608.cfg and yolov3-608.weights" 42 | #cp yolov3.cfg yolov3-608.cfg 43 | #ln -sf yolov3.weights yolov3-608.weights 44 | 45 | #echo "Creating yolov3-spp-288.cfg and yolov3-spp-288.weights" 46 | #cat yolov3-spp.cfg | sed -e '8s/width=608/width=288/' | sed -e '9s/height=608/height=288/' > yolov3-spp-288.cfg 47 | #ln -sf yolov3-spp.weights yolov3-spp-288.weights 48 | #echo "Creating yolov3-spp-416.cfg and yolov3-spp-416.weights" 49 | #cat yolov3-spp.cfg | sed -e '8s/width=608/width=416/' | sed -e '9s/height=608/height=416/' > yolov3-spp-416.cfg 50 | #ln -sf yolov3-spp.weights yolov3-spp-416.weights 51 | #echo "Creating yolov3-spp-608.cfg and yolov3-spp-608.weights" 52 | #cp yolov3-spp.cfg yolov3-spp-608.cfg 53 | #ln -sf yolov3-spp.weights yolov3-spp-608.weights 54 | 55 | #echo "Creating yolov4-tiny-288.cfg and yolov4-tiny-288.weights" 56 | #cat yolov4-tiny.cfg | sed -e '6s/batch=64/batch=1/' | sed -e '8s/width=416/width=288/' | sed -e '9s/height=416/height=288/' > yolov4-tiny-288.cfg 57 | #echo >> yolov4-tiny-288.cfg 58 | #ln -sf yolov4-tiny.weights yolov4-tiny-288.weights 59 | #echo "Creating yolov4-tiny-416.cfg and yolov4-tiny-416.weights" 60 | #cat yolov4-tiny.cfg | sed -e '6s/batch=64/batch=1/' > yolov4-tiny-416.cfg 61 | #echo >> yolov4-tiny-416.cfg 62 | #ln -sf yolov4-tiny.weights yolov4-tiny-416.weights 63 | 64 | #echo "Creating yolov4-288.cfg and yolov4-288.weights" 65 | #cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov4-288.cfg 66 | #ln -sf yolov4.weights yolov4-288.weights 67 | echo "Creating yolov4-416.cfg and yolov4-416.weights" 68 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov4-416.cfg 69 | ln -sf yolov4.weights yolov4-416.weights 70 | #echo "Creating yolov4-608.cfg and yolov4-608.weights" 71 | #cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' > yolov4-608.cfg 72 | #ln -sf yolov4.weights yolov4-608.weights 73 | 74 | #echo 75 | #echo "Done." 76 | 77 | -------------------------------------------------------------------------------- /yolo/onnx_to_tensorrt.py: -------------------------------------------------------------------------------- 1 | # onnx_to_tensorrt.py 2 | # 3 | # Copyright 1993-2019 NVIDIA Corporation. All rights reserved. 4 | # 5 | # NOTICE TO LICENSEE: 6 | # 7 | # This source code and/or documentation ("Licensed Deliverables") are 8 | # subject to NVIDIA intellectual property rights under U.S. and 9 | # international Copyright laws. 10 | # 11 | # These Licensed Deliverables contained herein is PROPRIETARY and 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and 13 | # conditions of a form of NVIDIA software license agreement by and 14 | # between NVIDIA and Licensee ("License Agreement") or electronically 15 | # accepted by Licensee. Notwithstanding any terms or conditions to 16 | # the contrary in the License Agreement, reproduction or disclosure 17 | # of the Licensed Deliverables to any third party without the express 18 | # written consent of NVIDIA is prohibited. 19 | # 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 33 | # OF THESE LICENSED DELIVERABLES. 34 | # 35 | # U.S. Government End Users. These Licensed Deliverables are a 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 37 | # 1995), consisting of "commercial computer software" and "commercial 38 | # computer software documentation" as such terms are used in 48 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 40 | # only as a commercial end item. Consistent with 48 C.F.R.12.212 and 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 42 | # U.S. Government End Users acquire the Licensed Deliverables with 43 | # only those rights set forth herein. 44 | # 45 | # Any use of the Licensed Deliverables in individual and commercial 46 | # software must include, in the user documentation and internal 47 | # comments to the code, the above Disclaimer and U.S. Government End 48 | # Users Notice. 49 | # 50 | 51 | 52 | from __future__ import print_function 53 | 54 | import os 55 | import argparse 56 | 57 | import tensorrt as trt 58 | 59 | from plugins import add_yolo_plugins 60 | 61 | 62 | EXPLICIT_BATCH = [] 63 | if trt.__version__[0] >= '7': 64 | EXPLICIT_BATCH.append( 65 | 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) 66 | 67 | 68 | def build_engine(onnx_file_path, category_num=80, verbose=False): 69 | """Build a TensorRT engine from an ONNX file.""" 70 | TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger() 71 | with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: 72 | builder.max_workspace_size = 1 << 28 73 | builder.max_batch_size = 1 74 | builder.fp16_mode = True 75 | #builder.strict_type_constraints = True 76 | 77 | # Parse model file 78 | print('Loading ONNX file from path {}...'.format(onnx_file_path)) 79 | with open(onnx_file_path, 'rb') as model: 80 | if not parser.parse(model.read()): 81 | print('ERROR: Failed to parse the ONNX file.') 82 | for error in range(parser.num_errors): 83 | print(parser.get_error(error)) 84 | return None 85 | if trt.__version__[0] >= '7': 86 | # The actual yolo*.onnx is generated with batch size 64. 87 | # Reshape input to batch size 1 88 | shape = list(network.get_input(0).shape) 89 | shape[0] = 1 90 | network.get_input(0).shape = shape 91 | 92 | print('Adding yolo_layer plugins...') 93 | model_name = onnx_file_path[:-5] 94 | network = add_yolo_plugins( 95 | network, model_name, category_num, TRT_LOGGER) 96 | 97 | print('Building an engine. This would take a while...') 98 | print('(Use "--verbose" to enable verbose logging.)') 99 | engine = builder.build_cuda_engine(network) 100 | print('Completed creating engine.') 101 | return engine 102 | 103 | 104 | def main(): 105 | """Create a TensorRT engine for ONNX-based YOLO.""" 106 | parser = argparse.ArgumentParser() 107 | parser.add_argument( 108 | '-v', '--verbose', action='store_true', 109 | help='enable verbose output (for debugging)') 110 | parser.add_argument( 111 | '-c', '--category_num', type=int, default=80, 112 | help='number of object categories [80]') 113 | parser.add_argument( 114 | '-m', '--model', type=str, required=True, 115 | help=('[yolov3|yolov3-tiny|yolov3-spp|yolov4|yolov4-tiny]-' 116 | '[{dimension}], where dimension could be a single ' 117 | 'number (e.g. 288, 416, 608) or WxH (e.g. 416x256)')) 118 | args = parser.parse_args() 119 | 120 | onnx_file_path = '%s.onnx' % args.model 121 | if not os.path.isfile(onnx_file_path): 122 | raise SystemExit('ERROR: file (%s) not found! You might want to run yolo_to_onnx.py first to generate it.' % onnx_file_path) 123 | engine_file_path = '%s.trt' % args.model 124 | engine = build_engine(onnx_file_path, args.category_num, args.verbose) 125 | with open(engine_file_path, 'wb') as f: 126 | f.write(engine.serialize()) 127 | print('Serialized the TensorRT engine to file: %s' % engine_file_path) 128 | 129 | 130 | if __name__ == '__main__': 131 | main() 132 | -------------------------------------------------------------------------------- /yolo/plugins.py: -------------------------------------------------------------------------------- 1 | """plugins.py 2 | 3 | I referenced the code from https://github.com/dongfangduoshou123/YoloV3-TensorRT/blob/master/seralizeEngineFromPythonAPI.py 4 | """ 5 | 6 | 7 | import ctypes 8 | 9 | import numpy as np 10 | import tensorrt as trt 11 | 12 | try: 13 | ctypes.cdll.LoadLibrary('../plugins/libyolo_layer.so') 14 | except OSError as e: 15 | raise SystemExit('ERROR: failed to load ../plugins/libyolo_layer.so. ' 16 | 'Did you forget to do a "make" in the "../plugins/" ' 17 | 'subdirectory?') 18 | 19 | 20 | def get_input_wh(model_name): 21 | """Get input_width and input_height of the model.""" 22 | yolo_dim = model_name.split('-')[-1] 23 | if 'x' in yolo_dim: 24 | dim_split = yolo_dim.split('x') 25 | if len(dim_split) != 2: 26 | raise ValueError('ERROR: bad yolo_dim (%s)!' % yolo_dim) 27 | w, h = int(dim_split[0]), int(dim_split[1]) 28 | else: 29 | h = w = int(yolo_dim) 30 | if h % 32 != 0 or w % 32 != 0: 31 | raise ValueError('ERROR: bad yolo_dim (%s)!' % yolo_dim) 32 | return w, h 33 | 34 | 35 | def get_yolo_whs(model_name, w, h): 36 | """Get yolo_width and yolo_height for all yolo layers in the model.""" 37 | if 'yolov3' in model_name: 38 | if 'tiny' in model_name: 39 | return [[w // 32, h // 32], [w // 16, h // 16]] 40 | else: 41 | return [[w // 32, h // 32], [w // 16, h // 16], [w // 8, h // 8]] 42 | elif 'yolov4' in model_name: 43 | if 'tiny' in model_name: 44 | return [[w // 32, h // 32], [w // 16, h // 16]] 45 | else: 46 | return [[w // 8, h // 8], [w // 16, h // 16], [w // 32, h // 32]] 47 | else: 48 | raise ValueError('ERROR: unknown model (%s)!' % args.model) 49 | 50 | 51 | def verify_classes(model_name, num_classes): 52 | """Verify 'classes=??' in cfg matches user-specified num_classes.""" 53 | cfg_file_path = model_name + '.cfg' 54 | with open(cfg_file_path, 'r') as f: 55 | cfg_lines = f.readlines() 56 | classes_lines = [l.strip() for l in cfg_lines if l.startswith('classes')] 57 | classes = [int(l.split('=')[-1]) for l in classes_lines] 58 | return all([c == num_classes for c in classes]) 59 | 60 | 61 | def get_anchors(model_name): 62 | """Get anchors of all yolo layers from the cfg file.""" 63 | cfg_file_path = model_name + '.cfg' 64 | with open(cfg_file_path, 'r') as f: 65 | cfg_lines = f.readlines() 66 | yolo_lines = [l.strip() for l in cfg_lines if l.startswith('[yolo]')] 67 | mask_lines = [l.strip() for l in cfg_lines if l.startswith('mask')] 68 | anch_lines = [l.strip() for l in cfg_lines if l.startswith('anchors')] 69 | assert len(mask_lines) == len(yolo_lines) 70 | assert len(anch_lines) == len(yolo_lines) 71 | anchor_list = eval('[%s]' % anch_lines[0].split('=')[-1]) 72 | mask_strs = [l.split('=')[-1] for l in mask_lines] 73 | masks = [eval('[%s]' % s) for s in mask_strs] 74 | anchors = [] 75 | for mask in masks: 76 | curr_anchors = [] 77 | for m in mask: 78 | curr_anchors.append(anchor_list[m * 2]) 79 | curr_anchors.append(anchor_list[m * 2 + 1]) 80 | anchors.append(curr_anchors) 81 | return anchors 82 | 83 | 84 | def get_scales(model_name): 85 | """Get scale_x_y's of all yolo layers from the cfg file.""" 86 | cfg_file_path = model_name + '.cfg' 87 | with open(cfg_file_path, 'r') as f: 88 | cfg_lines = f.readlines() 89 | yolo_lines = [l.strip() for l in cfg_lines if l.startswith('[yolo]')] 90 | scale_lines = [l.strip() for l in cfg_lines if l.startswith('scale_x_y')] 91 | if len(scale_lines) == 0: 92 | return [1.0] * len(yolo_lines) 93 | else: 94 | assert len(scale_lines) == len(yolo_lines) 95 | return [float(l.split('=')[-1]) for l in scale_lines] 96 | 97 | 98 | def get_plugin_creator(plugin_name, logger): 99 | """Get the TensorRT plugin creator.""" 100 | trt.init_libnvinfer_plugins(logger, '') 101 | plugin_creator_list = trt.get_plugin_registry().plugin_creator_list 102 | for c in plugin_creator_list: 103 | if c.name == plugin_name: 104 | return c 105 | return None 106 | 107 | 108 | def add_yolo_plugins(network, model_name, num_classes, logger): 109 | """Add yolo plugins into a TensorRT network.""" 110 | input_width, input_height = get_input_wh(model_name) 111 | yolo_whs = get_yolo_whs(model_name, input_width, input_height) 112 | if not verify_classes(model_name, num_classes): 113 | raise ValueError('bad num_classes (%d)' % num_classes) 114 | anchors = get_anchors(model_name) 115 | if len(anchors) != len(yolo_whs): 116 | raise ValueError('bad number of yolo layers: %d vs. %d' % 117 | (len(anchors), len(yolo_whs))) 118 | if network.num_outputs != len(anchors): 119 | raise ValueError('bad number of network outputs: %d vs. %d' % 120 | (network.num_outputs, len(anchors))) 121 | scales = get_scales(model_name) 122 | if any([s < 1.0 for s in scales]): 123 | raise ValueError('bad scale_x_y: %s' % str(scales)) 124 | 125 | plugin_creator = get_plugin_creator('YoloLayer_TRT', logger) 126 | if not plugin_creator: 127 | raise RuntimeError('cannot get YoloLayer_TRT plugin creator') 128 | old_tensors = [network.get_output(i) for i in range(network.num_outputs)] 129 | new_tensors = [None] * network.num_outputs 130 | for i, old_tensor in enumerate(old_tensors): 131 | new_tensors[i] = network.add_plugin_v2( 132 | [old_tensor], 133 | plugin_creator.create_plugin('YoloLayer_TRT', trt.PluginFieldCollection([ 134 | trt.PluginField("yoloWidth", np.array(yolo_whs[i][0], dtype=np.int32), trt.PluginFieldType.INT32), 135 | trt.PluginField("yoloHeight", np.array(yolo_whs[i][1], dtype=np.int32), trt.PluginFieldType.INT32), 136 | trt.PluginField("inputWidth", np.array(input_width, dtype=np.int32), trt.PluginFieldType.INT32), 137 | trt.PluginField("inputHeight", np.array(input_height, dtype=np.int32), trt.PluginFieldType.INT32), 138 | trt.PluginField("numClasses", np.array(num_classes, dtype=np.int32), trt.PluginFieldType.INT32), 139 | trt.PluginField("numAnchors", np.array(len(anchors[i]) // 2, dtype=np.int32), trt.PluginFieldType.INT32), 140 | trt.PluginField("anchors", np.ascontiguousarray(anchors[i], dtype=np.float32), trt.PluginFieldType.FLOAT32), 141 | trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32), 142 | ])) 143 | ).get_output(0) 144 | 145 | for new_tensor in new_tensors: 146 | network.mark_output(new_tensor) 147 | for old_tensor in old_tensors: 148 | network.unmark_output(old_tensor) 149 | 150 | return network 151 | -------------------------------------------------------------------------------- /yolo/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.15.1 2 | onnx==1.4.1 3 | pycuda>=2017.1.1 4 | -------------------------------------------------------------------------------- /yolo/yolo_to_onnx.py: -------------------------------------------------------------------------------- 1 | # yolo_to_onnx.py 2 | # 3 | # Copyright 1993-2019 NVIDIA Corporation. All rights reserved. 4 | # 5 | # NOTICE TO LICENSEE: 6 | # 7 | # This source code and/or documentation ("Licensed Deliverables") are 8 | # subject to NVIDIA intellectual property rights under U.S. and 9 | # international Copyright laws. 10 | # 11 | # These Licensed Deliverables contained herein is PROPRIETARY and 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and 13 | # conditions of a form of NVIDIA software license agreement by and 14 | # between NVIDIA and Licensee ("License Agreement") or electronically 15 | # accepted by Licensee. Notwithstanding any terms or conditions to 16 | # the contrary in the License Agreement, reproduction or disclosure 17 | # of the Licensed Deliverables to any third party without the express 18 | # written consent of NVIDIA is prohibited. 19 | # 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 33 | # OF THESE LICENSED DELIVERABLES. 34 | # 35 | # U.S. Government End Users. These Licensed Deliverables are a 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 37 | # 1995), consisting of "commercial computer software" and "commercial 38 | # computer software documentation" as such terms are used in 48 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 40 | # only as a commercial end item. Consistent with 48 C.F.R.12.212 and 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 42 | # U.S. Government End Users acquire the Licensed Deliverables with 43 | # only those rights set forth herein. 44 | # 45 | # Any use of the Licensed Deliverables in individual and commercial 46 | # software must include, in the user documentation and internal 47 | # comments to the code, the above Disclaimer and U.S. Government End 48 | # Users Notice. 49 | # 50 | 51 | 52 | from __future__ import print_function 53 | 54 | import os 55 | import sys 56 | import hashlib 57 | import argparse 58 | from collections import OrderedDict 59 | 60 | import numpy as np 61 | import onnx 62 | from onnx import helper, TensorProto 63 | 64 | from plugins import verify_classes, get_input_wh 65 | 66 | 67 | class DarkNetParser(object): 68 | """Definition of a parser for DarkNet-based YOLO model.""" 69 | 70 | def __init__(self, supported_layers): 71 | """Initializes a DarkNetParser object. 72 | 73 | Keyword argument: 74 | supported_layers -- a string list of supported layers in DarkNet naming convention, 75 | parameters are only added to the class dictionary if a parsed layer is included. 76 | """ 77 | 78 | # A list of YOLO layers containing dictionaries with all layer 79 | # parameters: 80 | self.layer_configs = OrderedDict() 81 | self.supported_layers = supported_layers 82 | self.layer_counter = 0 83 | 84 | def parse_cfg_file(self, cfg_file_path): 85 | """Takes the yolov?.cfg file and parses it layer by layer, 86 | appending each layer's parameters as a dictionary to layer_configs. 87 | 88 | Keyword argument: 89 | cfg_file_path 90 | """ 91 | with open(cfg_file_path, 'r') as cfg_file: 92 | remainder = cfg_file.read() 93 | while remainder is not None: 94 | layer_dict, layer_name, remainder = self._next_layer(remainder) 95 | if layer_dict is not None: 96 | self.layer_configs[layer_name] = layer_dict 97 | return self.layer_configs 98 | 99 | def _next_layer(self, remainder): 100 | """Takes in a string and segments it by looking for DarkNet delimiters. 101 | Returns the layer parameters and the remaining string after the last delimiter. 102 | Example for the first Conv layer in yolo.cfg ... 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=32 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | ... becomes the following layer_dict return value: 113 | {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32, 114 | 'batch_normalize': 1, 'type': 'convolutional', 'size': 3}. 115 | 116 | '001_convolutional' is returned as layer_name, and all lines that follow in yolo.cfg 117 | are returned as the next remainder. 118 | 119 | Keyword argument: 120 | remainder -- a string with all raw text after the previously parsed layer 121 | """ 122 | remainder = remainder.split('[', 1) 123 | if len(remainder) == 2: 124 | remainder = remainder[1] 125 | else: 126 | return None, None, None 127 | remainder = remainder.split(']', 1) 128 | if len(remainder) == 2: 129 | layer_type, remainder = remainder 130 | else: 131 | return None, None, None 132 | if remainder.replace(' ', '')[0] == '#': 133 | remainder = remainder.split('\n', 1)[1] 134 | 135 | out = remainder.split('\n\n', 1) 136 | if len(out) == 2: 137 | layer_param_block, remainder = out[0], out[1] 138 | else: 139 | layer_param_block, remainder = out[0], '' 140 | if layer_type == 'yolo': 141 | layer_param_lines = [] 142 | else: 143 | layer_param_lines = layer_param_block.split('\n')[1:] 144 | layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type 145 | layer_dict = dict(type=layer_type) 146 | if layer_type in self.supported_layers: 147 | for param_line in layer_param_lines: 148 | if param_line[0] == '#': 149 | continue 150 | param_type, param_value = self._parse_params(param_line) 151 | layer_dict[param_type] = param_value 152 | self.layer_counter += 1 153 | return layer_dict, layer_name, remainder 154 | 155 | def _parse_params(self, param_line): 156 | """Identifies the parameters contained in one of the cfg file and returns 157 | them in the required format for each parameter type, e.g. as a list, an int or a float. 158 | 159 | Keyword argument: 160 | param_line -- one parsed line within a layer block 161 | """ 162 | param_line = param_line.replace(' ', '') 163 | param_type, param_value_raw = param_line.split('=') 164 | param_value = None 165 | if param_type == 'layers': 166 | layer_indexes = list() 167 | for index in param_value_raw.split(','): 168 | layer_indexes.append(int(index)) 169 | param_value = layer_indexes 170 | elif isinstance(param_value_raw, str) and not param_value_raw.isalpha(): 171 | condition_param_value_positive = param_value_raw.isdigit() 172 | condition_param_value_negative = param_value_raw[0] == '-' and \ 173 | param_value_raw[1:].isdigit() 174 | if condition_param_value_positive or condition_param_value_negative: 175 | param_value = int(param_value_raw) 176 | else: 177 | param_value = float(param_value_raw) 178 | else: 179 | param_value = str(param_value_raw) 180 | return param_type, param_value 181 | 182 | 183 | class MajorNodeSpecs(object): 184 | """Helper class used to store the names of ONNX output names, 185 | corresponding to the output of a DarkNet layer and its output channels. 186 | Some DarkNet layers are not created and there is no corresponding ONNX node, 187 | but we still need to track them in order to set up skip connections. 188 | """ 189 | 190 | def __init__(self, name, channels): 191 | """ Initialize a MajorNodeSpecs object. 192 | 193 | Keyword arguments: 194 | name -- name of the ONNX node 195 | channels -- number of output channels of this node 196 | """ 197 | self.name = name 198 | self.channels = channels 199 | self.created_onnx_node = False 200 | if name is not None and isinstance(channels, int) and channels > 0: 201 | self.created_onnx_node = True 202 | 203 | 204 | class ConvParams(object): 205 | """Helper class to store the hyper parameters of a Conv layer, 206 | including its prefix name in the ONNX graph and the expected dimensions 207 | of weights for convolution, bias, and batch normalization. 208 | 209 | Additionally acts as a wrapper for generating safe names for all 210 | weights, checking on feasible combinations. 211 | """ 212 | 213 | def __init__(self, node_name, batch_normalize, conv_weight_dims): 214 | """Constructor based on the base node name (e.g. 101_convolutional), the batch 215 | normalization setting, and the convolutional weights shape. 216 | 217 | Keyword arguments: 218 | node_name -- base name of this YOLO convolutional layer 219 | batch_normalize -- bool value if batch normalization is used 220 | conv_weight_dims -- the dimensions of this layer's convolutional weights 221 | """ 222 | self.node_name = node_name 223 | self.batch_normalize = batch_normalize 224 | assert len(conv_weight_dims) == 4 225 | self.conv_weight_dims = conv_weight_dims 226 | 227 | def generate_param_name(self, param_category, suffix): 228 | """Generates a name based on two string inputs, 229 | and checks if the combination is valid.""" 230 | assert suffix 231 | assert param_category in ['bn', 'conv'] 232 | assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias']) 233 | if param_category == 'bn': 234 | assert self.batch_normalize 235 | assert suffix in ['scale', 'bias', 'mean', 'var'] 236 | elif param_category == 'conv': 237 | assert suffix in ['weights', 'bias'] 238 | if suffix == 'bias': 239 | assert not self.batch_normalize 240 | param_name = self.node_name + '_' + param_category + '_' + suffix 241 | return param_name 242 | 243 | class UpsampleParams(object): 244 | #Helper class to store the scale parameter for an Upsample node. 245 | 246 | def __init__(self, node_name, value): 247 | """Constructor based on the base node name (e.g. 86_Upsample), 248 | and the value of the scale input tensor. 249 | 250 | Keyword arguments: 251 | node_name -- base name of this YOLO Upsample layer 252 | value -- the value of the scale input to the Upsample layer as a numpy array 253 | """ 254 | self.node_name = node_name 255 | self.value = value 256 | 257 | def generate_param_name(self): 258 | """Generates the scale parameter name for the Upsample node.""" 259 | param_name = self.node_name + '_' + 'scale' 260 | return param_name 261 | 262 | class WeightLoader(object): 263 | """Helper class used for loading the serialized weights of a binary file stream 264 | and returning the initializers and the input tensors required for populating 265 | the ONNX graph with weights. 266 | """ 267 | 268 | def __init__(self, weights_file_path): 269 | """Initialized with a path to the YOLO .weights file. 270 | 271 | Keyword argument: 272 | weights_file_path -- path to the weights file. 273 | """ 274 | self.weights_file = self._open_weights_file(weights_file_path) 275 | 276 | def load_upsample_scales(self, upsample_params): 277 | """Returns the initializers with the value of the scale input 278 | tensor given by upsample_params. 279 | 280 | Keyword argument: 281 | upsample_params -- a UpsampleParams object 282 | """ 283 | initializer = list() 284 | inputs = list() 285 | name = upsample_params.generate_param_name() 286 | shape = upsample_params.value.shape 287 | data = upsample_params.value 288 | scale_init = helper.make_tensor( 289 | name, TensorProto.FLOAT, shape, data) 290 | scale_input = helper.make_tensor_value_info( 291 | name, TensorProto.FLOAT, shape) 292 | initializer.append(scale_init) 293 | inputs.append(scale_input) 294 | return initializer, inputs 295 | 296 | 297 | def load_conv_weights(self, conv_params): 298 | """Returns the initializers with weights from the weights file and 299 | the input tensors of a convolutional layer for all corresponding ONNX nodes. 300 | 301 | Keyword argument: 302 | conv_params -- a ConvParams object 303 | """ 304 | initializer = list() 305 | inputs = list() 306 | if conv_params.batch_normalize: 307 | bias_init, bias_input = self._create_param_tensors( 308 | conv_params, 'bn', 'bias') 309 | bn_scale_init, bn_scale_input = self._create_param_tensors( 310 | conv_params, 'bn', 'scale') 311 | bn_mean_init, bn_mean_input = self._create_param_tensors( 312 | conv_params, 'bn', 'mean') 313 | bn_var_init, bn_var_input = self._create_param_tensors( 314 | conv_params, 'bn', 'var') 315 | initializer.extend( 316 | [bn_scale_init, bias_init, bn_mean_init, bn_var_init]) 317 | inputs.extend([bn_scale_input, bias_input, 318 | bn_mean_input, bn_var_input]) 319 | else: 320 | bias_init, bias_input = self._create_param_tensors( 321 | conv_params, 'conv', 'bias') 322 | initializer.append(bias_init) 323 | inputs.append(bias_input) 324 | conv_init, conv_input = self._create_param_tensors( 325 | conv_params, 'conv', 'weights') 326 | initializer.append(conv_init) 327 | inputs.append(conv_input) 328 | return initializer, inputs 329 | 330 | def _open_weights_file(self, weights_file_path): 331 | """Opens a YOLO DarkNet file stream and skips the header. 332 | 333 | Keyword argument: 334 | weights_file_path -- path to the weights file. 335 | """ 336 | weights_file = open(weights_file_path, 'rb') 337 | length_header = 5 338 | np.ndarray(shape=(length_header, ), dtype='int32', 339 | buffer=weights_file.read(length_header * 4)) 340 | return weights_file 341 | 342 | def _create_param_tensors(self, conv_params, param_category, suffix): 343 | """Creates the initializers with weights from the weights file together with 344 | the input tensors. 345 | 346 | Keyword arguments: 347 | conv_params -- a ConvParams object 348 | param_category -- the category of parameters to be created ('bn' or 'conv') 349 | suffix -- a string determining the sub-type of above param_category (e.g., 350 | 'weights' or 'bias') 351 | """ 352 | param_name, param_data, param_data_shape = self._load_one_param_type( 353 | conv_params, param_category, suffix) 354 | 355 | initializer_tensor = helper.make_tensor( 356 | param_name, TensorProto.FLOAT, param_data_shape, param_data) 357 | input_tensor = helper.make_tensor_value_info( 358 | param_name, TensorProto.FLOAT, param_data_shape) 359 | return initializer_tensor, input_tensor 360 | 361 | def _load_one_param_type(self, conv_params, param_category, suffix): 362 | """Deserializes the weights from a file stream in the DarkNet order. 363 | 364 | Keyword arguments: 365 | conv_params -- a ConvParams object 366 | param_category -- the category of parameters to be created ('bn' or 'conv') 367 | suffix -- a string determining the sub-type of above param_category (e.g., 368 | 'weights' or 'bias') 369 | """ 370 | param_name = conv_params.generate_param_name(param_category, suffix) 371 | channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims 372 | if param_category == 'bn': 373 | param_shape = [channels_out] 374 | elif param_category == 'conv': 375 | if suffix == 'weights': 376 | param_shape = [channels_out, channels_in, filter_h, filter_w] 377 | #print(param_shape) 378 | elif suffix == 'bias': 379 | param_shape = [channels_out] 380 | param_size = np.product(np.array(param_shape)) 381 | param_data = np.ndarray( 382 | shape=param_shape, 383 | dtype='float32', 384 | buffer=self.weights_file.read(param_size * 4)) 385 | param_data = param_data.flatten().astype(float) 386 | return param_name, param_data, param_shape 387 | 388 | 389 | class GraphBuilderONNX(object): 390 | """Class for creating an ONNX graph from a previously generated list of layer dictionaries.""" 391 | 392 | def __init__(self, model_name, output_tensors): 393 | """Initialize with all DarkNet default parameters used creating 394 | YOLO, and specify the output tensors as an OrderedDict for their 395 | output dimensions with their names as keys. 396 | 397 | Keyword argument: 398 | output_tensors -- the output tensors as an OrderedDict containing the keys' 399 | output dimensions 400 | """ 401 | self.model_name = model_name 402 | self.output_tensors = output_tensors 403 | self._nodes = list() 404 | self.graph_def = None 405 | self.input_tensor = None 406 | self.epsilon_bn = 1e-5 407 | self.momentum_bn = 0.99 408 | self.alpha_lrelu = 0.1 409 | self.param_dict = OrderedDict() 410 | self.major_node_specs = list() 411 | self.batch_size = 1 412 | self.route_spec = 0 # keeping track of the current active 'route' 413 | 414 | def build_onnx_graph( 415 | self, 416 | layer_configs, 417 | weights_file_path, 418 | verbose=True): 419 | """Iterate over all layer configs (parsed from the DarkNet 420 | representation of YOLO), create an ONNX graph, populate it with 421 | weights from the weights file and return the graph definition. 422 | 423 | Keyword arguments: 424 | layer_configs -- an OrderedDict object with all parsed layers' configurations 425 | weights_file_path -- location of the weights file 426 | verbose -- toggles if the graph is printed after creation (default: True) 427 | """ 428 | for layer_name in layer_configs.keys(): 429 | layer_dict = layer_configs[layer_name] 430 | major_node_specs = self._make_onnx_node(layer_name, layer_dict) 431 | if major_node_specs.name is not None: 432 | self.major_node_specs.append(major_node_specs) 433 | # remove dummy 'route' and 'yolo' nodes 434 | self.major_node_specs = [node for node in self.major_node_specs 435 | if 'dummy' not in node.name] 436 | outputs = list() 437 | for tensor_name in self.output_tensors.keys(): 438 | output_dims = [self.batch_size, ] + \ 439 | self.output_tensors[tensor_name] 440 | output_tensor = helper.make_tensor_value_info( 441 | tensor_name, TensorProto.FLOAT, output_dims) 442 | outputs.append(output_tensor) 443 | inputs = [self.input_tensor] 444 | weight_loader = WeightLoader(weights_file_path) 445 | initializer = list() 446 | # If a layer has parameters, add them to the initializer and input lists. 447 | for layer_name in self.param_dict.keys(): 448 | _, layer_type = layer_name.split('_', 1) 449 | params = self.param_dict[layer_name] 450 | if layer_type == 'convolutional': 451 | #print('%s ' % layer_name, end='') 452 | initializer_layer, inputs_layer = weight_loader.load_conv_weights( 453 | params) 454 | initializer.extend(initializer_layer) 455 | inputs.extend(inputs_layer) 456 | elif layer_type == 'upsample': 457 | initializer_layer, inputs_layer = weight_loader.load_upsample_scales( 458 | params) 459 | initializer.extend(initializer_layer) 460 | inputs.extend(inputs_layer) 461 | del weight_loader 462 | self.graph_def = helper.make_graph( 463 | nodes=self._nodes, 464 | name=self.model_name, 465 | inputs=inputs, 466 | outputs=outputs, 467 | initializer=initializer 468 | ) 469 | if verbose: 470 | print(helper.printable_graph(self.graph_def)) 471 | model_def = helper.make_model(self.graph_def, 472 | producer_name='NVIDIA TensorRT sample') 473 | return model_def 474 | 475 | def _make_onnx_node(self, layer_name, layer_dict): 476 | """Take in a layer parameter dictionary, choose the correct function for 477 | creating an ONNX node and store the information important to graph creation 478 | as a MajorNodeSpec object. 479 | 480 | Keyword arguments: 481 | layer_name -- the layer's name (also the corresponding key in layer_configs) 482 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 483 | """ 484 | layer_type = layer_dict['type'] 485 | if self.input_tensor is None: 486 | if layer_type == 'net': 487 | major_node_output_name, major_node_output_channels = self._make_input_tensor( 488 | layer_name, layer_dict) 489 | major_node_specs = MajorNodeSpecs(major_node_output_name, 490 | major_node_output_channels) 491 | else: 492 | raise ValueError('The first node has to be of type "net".') 493 | else: 494 | node_creators = dict() 495 | node_creators['convolutional'] = self._make_conv_node 496 | node_creators['maxpool'] = self._make_maxpool_node 497 | node_creators['shortcut'] = self._make_shortcut_node 498 | node_creators['route'] = self._make_route_node 499 | node_creators['upsample'] = self._make_upsample_node 500 | node_creators['yolo'] = self._make_yolo_node 501 | 502 | if layer_type in node_creators.keys(): 503 | major_node_output_name, major_node_output_channels = \ 504 | node_creators[layer_type](layer_name, layer_dict) 505 | major_node_specs = MajorNodeSpecs(major_node_output_name, 506 | major_node_output_channels) 507 | else: 508 | print( 509 | 'Layer of type %s not supported, skipping ONNX node generation.' % 510 | layer_type) 511 | major_node_specs = MajorNodeSpecs(layer_name, 512 | None) 513 | return major_node_specs 514 | 515 | def _make_input_tensor(self, layer_name, layer_dict): 516 | """Create an ONNX input tensor from a 'net' layer and store the batch size. 517 | 518 | Keyword arguments: 519 | layer_name -- the layer's name (also the corresponding key in layer_configs) 520 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 521 | """ 522 | #batch_size = layer_dict['batch'] 523 | channels = layer_dict['channels'] 524 | height = layer_dict['height'] 525 | width = layer_dict['width'] 526 | #self.batch_size = batch_size 527 | input_tensor = helper.make_tensor_value_info( 528 | str(layer_name), TensorProto.FLOAT, [ 529 | self.batch_size, channels, height, width]) 530 | self.input_tensor = input_tensor 531 | return layer_name, channels 532 | 533 | def _get_previous_node_specs(self, target_index=0): 534 | """Get a previously ONNX node. 535 | 536 | Target index can be passed for jumping to a specific index. 537 | 538 | Keyword arguments: 539 | target_index -- optional for jumping to a specific index, 540 | default: 0 for the previous element, while 541 | taking 'route' spec into account 542 | """ 543 | if target_index == 0: 544 | if self.route_spec != 0: 545 | previous_node = self.major_node_specs[self.route_spec] 546 | assert 'dummy' not in previous_node.name 547 | self.route_spec = 0 548 | else: 549 | previous_node = self.major_node_specs[-1] 550 | else: 551 | previous_node = self.major_node_specs[target_index] 552 | assert previous_node.created_onnx_node 553 | return previous_node 554 | 555 | def _make_conv_node(self, layer_name, layer_dict): 556 | """Create an ONNX Conv node with optional batch normalization and 557 | activation nodes. 558 | 559 | Keyword arguments: 560 | layer_name -- the layer's name (also the corresponding key in layer_configs) 561 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 562 | """ 563 | previous_node_specs = self._get_previous_node_specs() 564 | inputs = [previous_node_specs.name] 565 | previous_channels = previous_node_specs.channels 566 | kernel_size = layer_dict['size'] 567 | stride = layer_dict['stride'] 568 | filters = layer_dict['filters'] 569 | batch_normalize = False 570 | if 'batch_normalize' in layer_dict.keys( 571 | ) and layer_dict['batch_normalize'] == 1: 572 | batch_normalize = True 573 | 574 | kernel_shape = [kernel_size, kernel_size] 575 | weights_shape = [filters, previous_channels] + kernel_shape 576 | conv_params = ConvParams(layer_name, batch_normalize, weights_shape) 577 | 578 | strides = [stride, stride] 579 | dilations = [1, 1] 580 | weights_name = conv_params.generate_param_name('conv', 'weights') 581 | inputs.append(weights_name) 582 | if not batch_normalize: 583 | bias_name = conv_params.generate_param_name('conv', 'bias') 584 | inputs.append(bias_name) 585 | 586 | conv_node = helper.make_node( 587 | 'Conv', 588 | inputs=inputs, 589 | outputs=[layer_name], 590 | kernel_shape=kernel_shape, 591 | strides=strides, 592 | auto_pad='SAME_LOWER', 593 | dilations=dilations, 594 | name=layer_name 595 | ) 596 | self._nodes.append(conv_node) 597 | inputs = [layer_name] 598 | layer_name_output = layer_name 599 | 600 | if batch_normalize: 601 | layer_name_bn = layer_name + '_bn' 602 | bn_param_suffixes = ['scale', 'bias', 'mean', 'var'] 603 | for suffix in bn_param_suffixes: 604 | bn_param_name = conv_params.generate_param_name('bn', suffix) 605 | inputs.append(bn_param_name) 606 | batchnorm_node = helper.make_node( 607 | 'BatchNormalization', 608 | inputs=inputs, 609 | outputs=[layer_name_bn], 610 | epsilon=self.epsilon_bn, 611 | momentum=self.momentum_bn, 612 | name=layer_name_bn 613 | ) 614 | self._nodes.append(batchnorm_node) 615 | inputs = [layer_name_bn] 616 | layer_name_output = layer_name_bn 617 | 618 | if layer_dict['activation'] == 'leaky': 619 | layer_name_lrelu = layer_name + '_lrelu' 620 | 621 | lrelu_node = helper.make_node( 622 | 'LeakyRelu', 623 | inputs=inputs, 624 | outputs=[layer_name_lrelu], 625 | name=layer_name_lrelu, 626 | alpha=self.alpha_lrelu 627 | ) 628 | self._nodes.append(lrelu_node) 629 | inputs = [layer_name_lrelu] 630 | layer_name_output = layer_name_lrelu 631 | elif layer_dict['activation'] == 'mish': 632 | layer_name_softplus = layer_name + '_softplus' 633 | layer_name_tanh = layer_name + '_tanh' 634 | layer_name_mish = layer_name + '_mish' 635 | 636 | softplus_node = helper.make_node( 637 | 'Softplus', 638 | inputs=inputs, 639 | outputs=[layer_name_softplus], 640 | name=layer_name_softplus, 641 | ) 642 | self._nodes.append(softplus_node) 643 | tanh_node = helper.make_node( 644 | 'Tanh', 645 | inputs=[layer_name_softplus], 646 | outputs=[layer_name_tanh], 647 | name=layer_name_tanh, 648 | ) 649 | self._nodes.append(tanh_node) 650 | 651 | inputs.append(layer_name_tanh) 652 | mish_node = helper.make_node( 653 | 'Mul', 654 | inputs=inputs, 655 | outputs=[layer_name_mish], 656 | name=layer_name_mish, 657 | ) 658 | self._nodes.append(mish_node) 659 | 660 | inputs = [layer_name_mish] 661 | layer_name_output = layer_name_mish 662 | elif layer_dict['activation'] == 'linear': 663 | pass 664 | else: 665 | print('Activation not supported.') 666 | 667 | self.param_dict[layer_name] = conv_params 668 | return layer_name_output, filters 669 | 670 | def _make_shortcut_node(self, layer_name, layer_dict): 671 | """Create an ONNX Add node with the shortcut properties from 672 | the DarkNet-based graph. 673 | 674 | Keyword arguments: 675 | layer_name -- the layer's name (also the corresponding key in layer_configs) 676 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 677 | """ 678 | shortcut_index = layer_dict['from'] 679 | activation = layer_dict['activation'] 680 | assert activation == 'linear' 681 | 682 | first_node_specs = self._get_previous_node_specs() 683 | second_node_specs = self._get_previous_node_specs( 684 | target_index=shortcut_index) 685 | assert first_node_specs.channels == second_node_specs.channels 686 | channels = first_node_specs.channels 687 | inputs = [first_node_specs.name, second_node_specs.name] 688 | shortcut_node = helper.make_node( 689 | 'Add', 690 | inputs=inputs, 691 | outputs=[layer_name], 692 | name=layer_name, 693 | ) 694 | self._nodes.append(shortcut_node) 695 | return layer_name, channels 696 | 697 | def _make_route_node(self, layer_name, layer_dict): 698 | """If the 'layers' parameter from the DarkNet configuration is only one index, continue 699 | node creation at the indicated (negative) index. Otherwise, create an ONNX Concat node 700 | with the route properties from the DarkNet-based graph. 701 | 702 | Keyword arguments: 703 | layer_name -- the layer's name (also the corresponding key in layer_configs) 704 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 705 | """ 706 | route_node_indexes = layer_dict['layers'] 707 | if len(route_node_indexes) == 1: 708 | if 'groups' in layer_dict.keys(): 709 | # for CSPNet-kind of architecture 710 | assert 'group_id' in layer_dict.keys() 711 | groups = layer_dict['groups'] 712 | group_id = int(layer_dict['group_id']) 713 | assert group_id < groups 714 | index = route_node_indexes[0] 715 | if index > 0: 716 | # +1 for input node (same reason as below) 717 | index += 1 718 | route_node_specs = self._get_previous_node_specs( 719 | target_index=index) 720 | assert route_node_specs.channels % groups == 0 721 | channels = route_node_specs.channels // groups 722 | 723 | outputs = [layer_name + '_dummy%d' % i for i in range(groups)] 724 | outputs[group_id] = layer_name 725 | route_node = helper.make_node( 726 | 'Split', 727 | axis=1, 728 | split=[channels] * groups, 729 | inputs=[route_node_specs.name], 730 | outputs=outputs, 731 | name=layer_name, 732 | ) 733 | self._nodes.append(route_node) 734 | else: 735 | if route_node_indexes[0] < 0: 736 | # route should skip self, thus -1 737 | self.route_spec = route_node_indexes[0] - 1 738 | elif route_node_indexes[0] > 0: 739 | # +1 for input node (same reason as below) 740 | self.route_spec = route_node_indexes[0] + 1 741 | # This dummy route node would be removed in the end. 742 | layer_name = layer_name + '_dummy' 743 | channels = 1 744 | else: 745 | assert 'groups' not in layer_dict.keys(), \ 746 | 'groups not implemented for multiple-input route layer!' 747 | inputs = list() 748 | channels = 0 749 | for index in route_node_indexes: 750 | if index > 0: 751 | # Increment by one because we count the input as 752 | # a node (DarkNet does not) 753 | index += 1 754 | route_node_specs = self._get_previous_node_specs( 755 | target_index=index) 756 | inputs.append(route_node_specs.name) 757 | channels += route_node_specs.channels 758 | assert inputs 759 | assert channels > 0 760 | 761 | route_node = helper.make_node( 762 | 'Concat', 763 | axis=1, 764 | inputs=inputs, 765 | outputs=[layer_name], 766 | name=layer_name, 767 | ) 768 | self._nodes.append(route_node) 769 | return layer_name, channels 770 | 771 | def _make_upsample_node(self, layer_name, layer_dict): 772 | """Create an ONNX Upsample node with the properties from 773 | the DarkNet-based graph. 774 | 775 | Keyword arguments: 776 | layer_name -- the layer's name (also the corresponding key in layer_configs) 777 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 778 | """ 779 | upsample_factor = float(layer_dict['stride']) 780 | # Create the scales array with node parameters 781 | scales = np.array([1.0, 1.0, upsample_factor, upsample_factor]).astype(np.float32) 782 | previous_node_specs = self._get_previous_node_specs() 783 | inputs = [previous_node_specs.name] 784 | 785 | channels = previous_node_specs.channels 786 | assert channels > 0 787 | upsample_params = UpsampleParams(layer_name, scales) 788 | scales_name = upsample_params.generate_param_name() 789 | # For ONNX opset >= 9, the Upsample node takes the scales array 790 | # as an input. 791 | inputs.append(scales_name) 792 | 793 | upsample_node = helper.make_node( 794 | 'Upsample', 795 | mode='nearest', 796 | inputs=inputs, 797 | outputs=[layer_name], 798 | name=layer_name, 799 | ) 800 | self._nodes.append(upsample_node) 801 | self.param_dict[layer_name] = upsample_params 802 | return layer_name, channels 803 | 804 | def _make_maxpool_node(self, layer_name, layer_dict): 805 | """Create an ONNX Maxpool node with the properties from 806 | the DarkNet-based graph. 807 | 808 | Keyword arguments: 809 | layer_name -- the layer's name (also the corresponding key in layer_configs) 810 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 811 | """ 812 | stride = layer_dict['stride'] 813 | kernel_size = layer_dict['size'] 814 | previous_node_specs = self._get_previous_node_specs() 815 | inputs = [previous_node_specs.name] 816 | channels = previous_node_specs.channels 817 | kernel_shape = [kernel_size, kernel_size] 818 | strides = [stride, stride] 819 | assert channels > 0 820 | maxpool_node = helper.make_node( 821 | 'MaxPool', 822 | inputs=inputs, 823 | outputs=[layer_name], 824 | kernel_shape=kernel_shape, 825 | strides=strides, 826 | auto_pad='SAME_UPPER', 827 | name=layer_name, 828 | ) 829 | self._nodes.append(maxpool_node) 830 | return layer_name, channels 831 | 832 | def _make_yolo_node(self, layer_name, layer_dict): 833 | """Create an ONNX Yolo node. 834 | 835 | These are dummy nodes which would be removed in the end. 836 | """ 837 | channels = 1 838 | return layer_name + '_dummy', channels 839 | 840 | 841 | def generate_md5_checksum(local_path): 842 | """Returns the MD5 checksum of a local file. 843 | 844 | Keyword argument: 845 | local_path -- path of the file whose checksum shall be generated 846 | """ 847 | with open(local_path, 'rb') as local_file: 848 | data = local_file.read() 849 | return hashlib.md5(data).hexdigest() 850 | 851 | 852 | def main(): 853 | """Run the DarkNet-to-ONNX conversion for YOLO (v3 or v4).""" 854 | if sys.version_info[0] < 3: 855 | raise SystemExit('ERROR: This modified version of yolov3_to_onnx.py ' 856 | 'script is only compatible with python3...') 857 | 858 | parser = argparse.ArgumentParser() 859 | parser.add_argument( 860 | '-c', '--category_num', type=int, default=80, 861 | help='number of object categories [80]') 862 | parser.add_argument( 863 | '-m', '--model', type=str, required=True, 864 | help=('[yolov3|yolov3-tiny|yolov3-spp|yolov4|yolov4-tiny]-' 865 | '[{dimension}], where dimension could be a single ' 866 | 'number (e.g. 288, 416, 608) or WxH (e.g. 416x256)')) 867 | args = parser.parse_args() 868 | if args.category_num <= 0: 869 | raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) 870 | 871 | cfg_file_path = '%s.cfg' % args.model 872 | if not os.path.isfile(cfg_file_path): 873 | raise SystemExit('ERROR: file (%s) not found!' % cfg_file_path) 874 | weights_file_path = '%s.weights' % args.model 875 | if not os.path.isfile(weights_file_path): 876 | raise SystemExit('ERROR: file (%s) not found!' % weights_file_path) 877 | output_file_path = '%s.onnx' % args.model 878 | 879 | if not verify_classes(args.model, args.category_num): 880 | raise SystemExit('ERROR: bad category_num (%d)' % args.category_num) 881 | 882 | # Derive yolo input width/height from model name. 883 | # For example, "yolov4-416x256" -> width=416, height=256 884 | w, h = get_input_wh(args.model) 885 | 886 | # These are the only layers DarkNetParser will extract parameters 887 | # from. The three layers of type 'yolo' are not parsed in detail 888 | # because they are included in the post-processing later. 889 | supported_layers = ['net', 'convolutional', 'maxpool', 890 | 'shortcut', 'route', 'upsample', 'yolo'] 891 | 892 | # Create a DarkNetParser object, and the use it to generate an 893 | # OrderedDict with all layer's configs from the cfg file. 894 | print('Parsing DarkNet cfg file...') 895 | parser = DarkNetParser(supported_layers) 896 | layer_configs = parser.parse_cfg_file(cfg_file_path) 897 | 898 | # We do not need the parser anymore after we got layer_configs. 899 | del parser 900 | 901 | # In above layer_config, there are three outputs that we need to 902 | # know the output shape of (in CHW format). 903 | output_tensor_dims = OrderedDict() 904 | c = (args.category_num + 5) * 3 905 | if 'yolov3' in args.model: 906 | if 'tiny' in args.model: 907 | output_tensor_dims['016_convolutional'] = [c, h // 32, w // 32] 908 | output_tensor_dims['023_convolutional'] = [c, h // 16, w // 16] 909 | elif 'spp' in args.model: 910 | output_tensor_dims['089_convolutional'] = [c, h // 32, w // 32] 911 | output_tensor_dims['101_convolutional'] = [c, h // 16, w // 16] 912 | output_tensor_dims['113_convolutional'] = [c, h // 8, w // 8] 913 | else: 914 | output_tensor_dims['082_convolutional'] = [c, h // 32, w // 32] 915 | output_tensor_dims['094_convolutional'] = [c, h // 16, w // 16] 916 | output_tensor_dims['106_convolutional'] = [c, h // 8, w // 8] 917 | elif 'yolov4' in args.model: 918 | if 'tiny' in args.model: 919 | output_tensor_dims['030_convolutional'] = [c, h // 32, w // 32] 920 | output_tensor_dims['037_convolutional'] = [c, h // 16, w // 16] 921 | else: 922 | output_tensor_dims['139_convolutional'] = [c, h // 8, w // 8] 923 | output_tensor_dims['150_convolutional'] = [c, h // 16, w // 16] 924 | output_tensor_dims['161_convolutional'] = [c, h // 32, w // 32] 925 | else: 926 | raise SystemExit('ERROR: unknown model (%s)!' % args.model) 927 | 928 | # Create a GraphBuilderONNX object with the specified output tensor 929 | # dimensions. 930 | print('Building ONNX graph...') 931 | builder = GraphBuilderONNX(args.model, output_tensor_dims) 932 | 933 | # Now generate an ONNX graph with weights from the previously parsed 934 | # layer configurations and the weights file. 935 | yolo_model_def = builder.build_onnx_graph( 936 | layer_configs=layer_configs, 937 | weights_file_path=weights_file_path, 938 | verbose=True) 939 | 940 | # Once we have the model definition, we do not need the builder anymore. 941 | del builder 942 | 943 | # Perform a sanity check on the ONNX model definition. 944 | print('Checking ONNX model...') 945 | onnx.checker.check_model(yolo_model_def) 946 | 947 | # Serialize the generated ONNX graph to this file. 948 | print('Saving ONNX file...') 949 | onnx.save(yolo_model_def, output_file_path) 950 | 951 | print('Done.') 952 | 953 | 954 | if __name__ == '__main__': 955 | main() 956 | --------------------------------------------------------------------------------