├── .gitignore ├── CMakeLists.txt ├── README.md ├── configs ├── bdd100k.names ├── lightNet-BDD100K-1280x960-chPruning.cfg ├── lightNet-BDD100K-1280x960-chPruning.txt ├── lightNet-BDD100K-1280x960.cfg ├── lightNet-BDD100K-1280x960.txt ├── lightNet-BDD100K-chPruning-det-semaseg-1280x960.cfg ├── lightNet-BDD100K-chPruning-det-semaseg-1280x960.txt ├── lightNet-BDD100K-det-semaseg-1280x960.cfg ├── lightNet-BDD100K-det-semaseg-1280x960.txt ├── yolov8x-BDD100K-640x640-T4-lane4cls.cfg └── yolov8x-BDD100K-640x640-T4-lane4cls.txt ├── extra ├── API.h └── class_timer.hpp ├── launch └── lightnet_trt.launch.xml ├── modules ├── calibrator.cpp ├── calibrator.h ├── chunk.cu ├── chunk.h ├── class_detector.cpp ├── class_detector.h ├── class_yolo_detector.hpp ├── colormap.hpp ├── detect.cu ├── detect.h ├── ds_image.cpp ├── ds_image.h ├── err.log ├── hardswish.cu ├── hardswish.h ├── kernel.cu ├── logging.h ├── mish.cu ├── mish.h ├── plugin_factory.cpp ├── plugin_factory.h ├── preprocess.cu ├── preprocess.h ├── profiler.cpp ├── profiler.h ├── trt_utils.cpp ├── trt_utils.h ├── yolo.cpp ├── yolo.h ├── yolo_config_parser.cpp ├── yolo_config_parser.h ├── yoloplugin_lib.cpp ├── yoloplugin_lib.h ├── yolov2.cpp ├── yolov2.h ├── yolov3.cpp ├── yolov3.h ├── yolov4.cpp ├── yolov4.h ├── yolov5.cpp └── yolov5.h ├── package.xml ├── samples ├── parallel_executor.cpp └── sample_detector.cpp └── src ├── lightnet_trt_core.cpp ├── lightnet_trt_core.hpp ├── lightnet_trt_node.cpp ├── lightnet_trt_node.hpp ├── main.cpp ├── utils.cpp └── utils.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *.dSYM 4 | *.csv 5 | *.out 6 | *.png 7 | *.so 8 | *.exe 9 | *.dll 10 | *.lib 11 | *.dylib 12 | *.pyc 13 | *~ 14 | mnist/ 15 | data/ 16 | caffe/ 17 | grasp/ 18 | images/ 19 | opencv/ 20 | convnet/ 21 | decaf/ 22 | submission/ 23 | cfg/ 24 | temp/ 25 | build/darknet/* 26 | build_*/ 27 | ninja/ 28 | ninja.zip 29 | vcpkg_installed/ 30 | !build/darknet/YoloWrapper.cs 31 | .fuse* 32 | *.weights 33 | *.db 34 | *.engine 35 | *calibration.table 36 | build/* 37 | build/*.cmake 38 | build/*.ninja 39 | build/*.txt 40 | build/*.json 41 | build/CMakeFiles/ 42 | build/detect_cuda_compute_capabilities.cu 43 | build/.ninja_deps 44 | build/.ninja_log 45 | build/Makefile 46 | */vcpkg-manifest-install.log 47 | build.log 48 | __pycache__/ 49 | 50 | # OS Generated # 51 | .DS_Store* 52 | ehthumbs.db 53 | Icon? 54 | Thumbs.db 55 | *.swp 56 | 57 | # IDE generated # 58 | .vs/ 59 | .vscode/ 60 | 61 | # Managed by CMake 62 | src/version.h 63 | 64 | # Build artifacts 65 | lib/ 66 | share/ 67 | include/darknet/ 68 | uselib 69 | uselib_track 70 | darknet 71 | vcpkg/ 72 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # cmake_minimum_required(VERSION 3.1) 2 | cmake_minimum_required(VERSION 3.14) 3 | project(lightnet_trt VERSION 1.0) 4 | 5 | enable_language(CUDA) # not supported on CMAKE 3.14 6 | 7 | find_package(OpenMP REQUIRED) 8 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") # Added for CMAKE 3.14 9 | set(CMAKE_CXX_COMPILER "/usr/bin/g++") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3 -Wno-unused-function -Wno-maybe-uninitialized -Wno-narrowing -Wno-misleading-indentation -Wno-unused-but-set-variable -Wno-unused-variable -Wno-return-type -Wno-deprecated-declarations -Wno-write-strings -Wno-reorder -Wno-sign-compare -Wno-unused-parameter -Wno-unused-value -Wno-uninitialized ${OpenMP_CXX_FLAGS} -fopenmp") 11 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O0 -g -Wno-write-strings") 12 | #set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb -g") 13 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,$ORIGIN") 14 | 15 | include_directories(/usr/local/cuda/include) 16 | find_package(CUDA REQUIRED) 17 | 18 | find_package(OpenCV REQUIRED) 19 | 20 | file(GLOB_RECURSE sources modules/*.hpp modules/*.cpp modules/*.h modules/*.cu extra/*.h module/preprocess.cu) 21 | add_library(detector SHARED ${sources}) 22 | 23 | target_include_directories(detector PRIVATE extra/ modules/ ${OpenCV_INCLUDE_DIRS} ${CUDA_TOOLKIT_ROOT_DIR}/include) 24 | target_link_libraries(detector ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers gflags "stdc++fs") 25 | 26 | add_executable(lightnet_trt samples/sample_detector.cpp ${sources}) 27 | 28 | target_include_directories(lightnet_trt PRIVATE modules/ extra/) 29 | target_link_libraries(lightnet_trt ${CUDA_LIBRARIES} ${OpenCV_LIBS} nvinfer nvinfer_plugin nvparsers gflags boost_system boost_filesystem "stdc++fs" "stdc++") 30 | 31 | 32 | # ROS 2 package 33 | if(BUILD_ROS2_LIGHTNET_TRT) 34 | find_package(autoware_cmake REQUIRED) 35 | autoware_package() 36 | 37 | find_package(Eigen3 REQUIRED) 38 | 39 | # build nodelet 40 | # TODO 41 | 42 | # build node 43 | ament_auto_add_executable(lightnet_trt_node 44 | src/main.cpp 45 | src/lightnet_trt_node.cpp 46 | src/lightnet_trt_core.cpp 47 | src/utils.cpp 48 | ) 49 | ament_target_dependencies(lightnet_trt_node) 50 | target_include_directories(lightnet_trt_node PRIVATE modules/ extra/) 51 | target_link_libraries(lightnet_trt_node detector) 52 | target_link_libraries(lightnet_trt_node ${CUDA_LIBRARIES} ${OpenCV_LIBS} Eigen3::Eigen nvinfer nvinfer_plugin nvparsers gflags boost_system boost_filesystem "stdc++fs" "stdc++") 53 | 54 | ament_auto_package(INSTALL_TO_SHARE 55 | launch 56 | configs 57 | ) 58 | endif() 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LightNet-TRT: High-Efficiency and Real-Time CNN Implementation on Edge AI 2 | 3 | LightNet-TRT is a CNN implementation optimized for edge AI devices that combines the advantages of LightNet [[1]](#references) and TensorRT [[2]](#references). LightNet is a lightweight and high-performance neural network framework designed for edge devices, while TensorRT is a high-performance deep learning inference engine developed by NVIDIA for optimizing and running deep learning models on GPUs. LightNet-TRT uses the Network Definition API provided by TensorRT to integrate LightNet into TensorRT, allowing it to run efficiently and in real-time on edge devices. 4 | 5 | ![lightNet-detection-1024x256](https://user-images.githubusercontent.com/43815838/227718872-b9dfdb7b-4d3c-45ce-b1d9-585a99380434.gif) 6 | 7 | 8 | ## Key Improvements 9 | 10 | ### 2:4 Structured Sparsity 11 | 12 | LightNet-TRT utilizes 2:4 structured sparsity [[3]](#references) to further optimize the network. 2:4 structured sparsity means that two values must be zero in each contiguous block of four values, resulting in a 50% reduction in the number of weights. This technique allows the network to use fewer weights and computations while maintaining accuracy. 13 | 14 | ![Sparsity](https://developer-blogs.nvidia.com/ja-jp/wp-content/uploads/sites/6/2022/06/2-4-structured-sparse-matrix.png "sparsity") 15 | 16 | ### NVDLA Execution 17 | 18 | LightNet-TRT also supports the execution of the neural network on the NVIDIA Deep Learning Accelerator (NVDLA) [[4]](#references) , a free and open architecture that provides high performance and low power consumption for deep learning inference on edge devices. By using NVDLA, LightNet-TRT can further improve the efficiency and performance of the network on edge devices. 19 | 20 | ![NVDLA](https://i0.wp.com/techgrabyte.com/wp-content/uploads/2019/09/Nvidia-Open-Source-Its-Deep-Learning-Inference-Compiler-NVDLA-2.png?w=768&ssl=1 "NVDLA") 21 | 22 | 23 | ### Multi-Precision Quantization 24 | 25 | In addition to post training quantization [[5]](#references), LightNet-TRT also supports multi-precision quantization, which allows the network to use different precision for weights and activations. By using mixed precision, LightNet-TRT can further reduce the memory usage and computational requirements of the network while still maintaining accuracy. By writing it in CFG, you can set the precision for each layer of your CNN. 26 | 27 | ![Quantization](https://developer-blogs.nvidia.com/wp-content/uploads/2021/07/qat-training-precision.png "Quantization") 28 | 29 | 30 | 31 | ### Multitask Execution (Detection/Segmentation) 32 | 33 | LightNet-TRT also supports multitask execution, allowing the network to perform both object detection and segmentation tasks simultaneously. This enables the network to perform multiple tasks efficiently on edge devices, saving computational resources and power. 34 | 35 | [![](https://img.youtube.com/vi/TmlW-b_t3sQ/0.jpg)](https://www.youtube.com/watch?v=TmlW-b_t3sQ) 36 | 37 | ## Installation 38 | 39 | ### Requirements 40 | 41 | - CUDA 11.0 or higher 42 | - TensorRT 8.0 or higher 43 | - OpenCV 3.0 or higher 44 | 45 | ### Steps 46 | 47 | 1. Clone the repository. 48 | 49 | ```shell 50 | $ git clone https://github.com/daniel89710/lightNet-TRT.git 51 | $ cd lightNet-TRT 52 | ``` 53 | 54 | 2. Install libraries. 55 | 56 | ```shell 57 | $ sudo apt update 58 | $ sudo apt install libgflags-dev 59 | $ sudo apt install libboost-all-dev 60 | ``` 61 | 62 | 3. Compile the TensorRT implementation. 63 | 64 | ```shell 65 | $ mkdir build 66 | $ cmake ../ 67 | $ make -j 68 | ``` 69 | 70 | ## Model 71 | | Model | Resolutions | GFLOPS | Params | Precision | Sparsity | DNN time on RTX3080 | DNN time on Jetson Orin NX 16GB GPU | DNN time on Jetson Orin NX 16GB DLA| DNN time on Jetson Orin Nano 4GB GPU | cfg | weights | 72 | |---|---|---|---|---|---|---|---|---|---|---|---| 73 | | lightNet | 1280x960 | 58.01 | 9.0M | int8 | 49.8% | 1.30ms | 7.6ms | 14.2ms | 14.9ms | [github](https://github.com/daniel89710/lightNet/blob/master/cfg/lightNet-BDD100K-1280x960.cfg) |[GoogleDrive](https://drive.google.com/file/d/1qTBQ0BkIYqcyu1BwC54_Z9T1_b702HKf/view?usp=sharing) | 74 | | LightNet+Semseg | 1280x960 | 76.61 | 9.7M | int8 | 49.8% | 2.06ms | 15.3ms | 23.2ms | 26.2ms | [github](https://github.com/daniel89710/lightNet-TRT/blob/main/configs/lightNet-BDD100K-det-semaseg-1280x960.cfg) | [GoogleDrive](https://drive.google.com/file/d/1ttdVtlDiPun13EQCB4Nyls3Q8w5aXg1i/view?usp=sharing)| 75 | | lightNet pruning | 1280x960 | 35.03 | 4.3M | int8 | 49.8% | 1.21ms | 8.89ms | 11.68ms | 13.75ms | [github](https://github.com/daniel89710/lightNet-TRT/blob/main/configs/lightNet-BDD100K-1280x960-chPruning.cfg) |[GoogleDrive](https://drive.google.com/file/d/1OGZApPeNH7K08-89eJ8tGhzA9kkRFLii/view?usp=sharing) | 76 | | LightNet pruning +SemsegLight | 1280x960 | 44.35 | 4.9M | int8 | 49.8% | 1.80ms | 9.89ms | 15.26ms | 23.35ms | [github](https://github.com/daniel89710/lightNet-TRT/blob/main/configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.cfg) | [GoogleDrive](https://drive.google.com/file/d/1dytYnqS4h_5YK73tr6DOZTUYUEKZYsFe/view?usp=drive_link)| 77 | 78 | * "DNN time" refers to the time measured by IProfiler during the enqueueV2 operation, excluding pre-process and post-process times. 79 | * Orin NX has three independent AI processors, allowing lightNet to be parallelized across a GPU and two DLAs. 80 | * Orin Nano 4GB has only iGPU with 512 CUDA cores. 81 | ## Usage 82 | 83 | ### Converting a LightNet model to a TensorRT engine 84 | 85 | Build FP32 engine 86 | ```shell 87 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kFLOAT 88 | ``` 89 | 90 | Build FP16(HALF) engine 91 | ```shell 92 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kHALF 93 | ``` 94 | 95 | Build INT8 engine 96 | (You need to prepare a list for calibration in "configs/calibration_images.txt".) 97 | ```shell 98 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kINT8 99 | ``` 100 | 101 | Build DLA engine (Supported by only Xavier and Orin) 102 | ```shell 103 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kINT8 --dla [0/1] 104 | ``` 105 | 106 | ### Inference with the TensorRT engine 107 | 108 | Inference from images 109 | ```shell 110 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision [kFLOAT/kHALF/kINT8] {--dla [0/1]} --d DIRECTORY 111 | ``` 112 | 113 | Inference from images 114 | ```shell 115 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision [kFLOAT/kHALF/kINT8] {--dla [0/1]} --d VIDEO 116 | ``` 117 | 118 | ### Inference with ROS 2 node 119 | 120 | You can build as follows. Note that current implementation requires Autoware installed and sourced. 121 | 122 | ```bash 123 | mkdir lightnet_trt_ws/src -p 124 | cd lightnet_trt_ws/src 125 | git clone https://github.com/kminoda/lightNet-TRT-ROS2.git 126 | cd .. 127 | colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release -DBUILD_ROS2_LIGHTNET_TRT=ON 128 | ``` 129 | 130 | Then, you can run the node as follows. 131 | 132 | ```bash 133 | ros2 launch lightnet_trt lightnet_trt.launch.xml 134 | ``` 135 | 136 | ## Implementation 137 | 138 | LightNet-TRT is built on the LightNet framework and integrates with TensorRT using the Network Definition API. The implementation is based on the following repositories: 139 | 140 | - LightNet: [https://github.com/daniel89710/lightNet](https://github.com/daniel89710/lightNet) 141 | - TensorRT: [https://github.com/NVIDIA/TensorRT](https://github.com/NVIDIA/TensorRT) 142 | - NVIDIA DeepStream SDK: [https://github.com/NVIDIA-AI-IOT/deepstream\_reference\_apps/tree/restructure](https://github.com/NVIDIA-AI-IOT/deepstream_reference_apps/tree/restructure) 143 | - YOLO-TensorRT: [https://github.com/enazoe/yolo-tensorrt](https://github.com/enazoe/yolo-tensorrt) 144 | 145 | ## Conclusion 146 | 147 | LightNet-TRT is a powerful and efficient implementation of CNNs using Edge AI. With its advanced features and integration with TensorRT, it is an excellent choice for real-time object detection and semantic segmentation applications on edge devices. 148 | 149 | # References 150 | [1]. [LightNet](https://github.com/daniel89710/lightNet) 151 | [2]. [TensorRT](https://developer.nvidia.com/tensorrt) 152 | [3]. [Accelerating Inference with Sparsity Using the NVIDIA Ampere Architecture and NVIDIA TensorRT](https://developer.nvidia.com/blog/accelerating-inference-with-sparsity-using-ampere-and-tensorrt/) 153 | [4]. [NVDLA](http://nvdla.org/) 154 | [5]. [Achieving FP32 Accuracy for INT8 Inference Using Quantization Aware Training with NVIDIA TensorRT](https://developer.nvidia.com/blog/achieving-fp32-accuracy-for-int8-inference-using-quantization-aware-training-with-tensorrt/) 155 | 156 | 157 | -------------------------------------------------------------------------------- /configs/bdd100k.names: -------------------------------------------------------------------------------- 1 | car 2 | bus 3 | person 4 | bike 5 | truck 6 | motor 7 | train 8 | rider 9 | traffic_sign 10 | traffic_light -------------------------------------------------------------------------------- /configs/lightNet-BDD100K-1280x960-chPruning.txt: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # config_file_path : Darknet cfg file 24 | # wts_file_path : Darknet weights file 25 | 26 | --network_type=YOLOV3 27 | --config_file_path=../configs/lightNet-BDD100K-1280x960-chPruning.cfg 28 | --wts_file_path=../configs/lightNet-BDD100K-1280x960-chPruning.weights 29 | 30 | #Support kFLOAT/kHALF/kINT8 31 | #kINT8 requires 'calibration_images.txt' 32 | --precision=kFLOAT 33 | 34 | #Support DLA for Xavier/Orin series 35 | #--dla=[0/1] 36 | --mp=true 37 | -------------------------------------------------------------------------------- /configs/lightNet-BDD100K-1280x960.txt: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # config_file_path : Darknet cfg file 24 | # wts_file_path : Darknet weights file 25 | 26 | --network_type=YOLOV3 27 | --config_file_path=../configs/lightNet-BDD100K-1280x960.cfg 28 | --wts_file_path=../configs/lightNet-BDD100K-1280x960.weights 29 | 30 | #Support kFLOAT/kHALF/kINT8 31 | --precision=kFLOAT 32 | 33 | #Support DLA for Xavier/Orin series 34 | #--dla=[0/1] 35 | --mp=true 36 | -------------------------------------------------------------------------------- /configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.txt: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ### This is a sample config file for yolo plugin and the trt-yolo-app 24 | # Statements starting with '#' are ignored. 25 | 26 | # The following 4 config params are required for yolo plugin and trt-yolo-app 27 | 28 | # network_type : Type of network architecture. Choose from yolov2, yolov2-tiny, yolov3 and yolov3-tiny 29 | # config_file_path : Darknet cfg file 30 | # wts_file_path : Darknet weights file 31 | # labels_file_path : Text file with a list of object class labels 32 | 33 | --network_type=YOLOV3 34 | --config_file_path=../configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.cfg 35 | --wts_file_path=../configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.weights 36 | 37 | #Support kFLOAT/kHALF/kINT8 38 | --precision=kFLOAT 39 | 40 | #Support DLA for Xavier/Orin series 41 | #--dla=[0/1] 42 | #--mp=true 43 | -------------------------------------------------------------------------------- /configs/lightNet-BDD100K-det-semaseg-1280x960.txt: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ### This is a sample config file for yolo plugin and the trt-yolo-app 24 | # Statements starting with '#' are ignored. 25 | 26 | # The following 4 config params are required for yolo plugin and trt-yolo-app 27 | 28 | # network_type : Type of network architecture. Choose from yolov2, yolov2-tiny, yolov3 and yolov3-tiny 29 | # config_file_path : Darknet cfg file 30 | # wts_file_path : Darknet weights file 31 | # labels_file_path : Text file with a list of object class labels 32 | 33 | --network_type=YOLOV3 34 | --config_file_path=../configs/lightNet-BDD100K-det-semaseg-1280x960.cfg 35 | --wts_file_path=../configs/lightNet-BDD100K-det-semaseg-1280x960.weights 36 | 37 | #Support kFLOAT/kHALF/kINT8 38 | --precision=kFLOAT 39 | 40 | #Support DLA for Xavier/Orin series 41 | #--dla=[0/1] 42 | #--mp=true 43 | -------------------------------------------------------------------------------- /configs/yolov8x-BDD100K-640x640-T4-lane4cls.txt: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ### This is a sample config file for yolo plugin and the trt-yolo-app 24 | # Statements starting with '#' are ignored. 25 | 26 | # The following 4 config params are required for yolo plugin and trt-yolo-app 27 | 28 | # network_type : Type of network architecture. Choose from yolov2, yolov2-tiny, yolov3 and yolov3-tiny 29 | # config_file_path : Darknet cfg file 30 | # wts_file_path : Darknet weights file 31 | # labels_file_path : Text file with a list of object class labels 32 | 33 | --network_type=YOLOV8X 34 | --config_file_path=../configs/yolov8x-BDD100K-640x640-T4-lane4cls.cfg 35 | --wts_file_path=../weights/yolov8x-BDD100K-640x640-T4-lane4cls-scratch-pseudo-finetune_last.weights 36 | 37 | #Support kFLOAT/kHALF/kINT8 38 | --precision=kFLOAT 39 | 40 | #Support DLA for Xavier/Orin series 41 | #--dla=[0/1] 42 | #--mp=true 43 | -------------------------------------------------------------------------------- /extra/API.h: -------------------------------------------------------------------------------- 1 | #ifdef API_EXPORTS 2 | 3 | #if defined(_MSC_VER) 4 | #define API __declspec(dllexport) 5 | #else 6 | #define API __attribute__((visibility("default"))) 7 | #endif 8 | 9 | #else 10 | 11 | #if defined(_MSC_VER) 12 | #define API __declspec(dllimport) 13 | #else 14 | #define API 15 | #endif 16 | 17 | #endif -------------------------------------------------------------------------------- /extra/class_timer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class Timer 8 | { 9 | public: 10 | Timer() : beg_(clock_::now()) 11 | {} 12 | void reset() 13 | { 14 | beg_ = clock_::now(); 15 | } 16 | 17 | double elapsed() const 18 | { 19 | return std::chrono::duration_cast(clock_::now() - beg_).count(); 20 | } 21 | 22 | void out(std::string message = "") 23 | { 24 | double t = elapsed(); 25 | std::cout << message << " elasped time:" << t << "ms" << std::endl; 26 | reset(); 27 | } 28 | 29 | double get_duration()const 30 | { 31 | return elapsed(); 32 | } 33 | private: 34 | using clock_ = std::chrono::high_resolution_clock; 35 | using second_ = std::chrono::duration; 36 | std::chrono::time_point beg_; 37 | }; 38 | -------------------------------------------------------------------------------- /launch/lightnet_trt.launch.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /modules/calibrator.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #include "calibrator.h" 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, 33 | const std::string& calibImagesPath, 34 | const std::string& calibTableFilePath, 35 | const uint64_t& inputSize, const uint32_t& inputH, 36 | const uint32_t& inputW, const std::string& inputBlobName, 37 | const std::string &s_net_type_) : 38 | m_BatchSize(batchSize), 39 | m_InputH(inputH), 40 | m_InputW(inputW), 41 | m_InputSize(inputSize), 42 | m_InputCount(batchSize * inputSize), 43 | m_InputBlobName(inputBlobName), 44 | m_CalibTableFilePath(calibTableFilePath), 45 | m_ImageIndex(0), 46 | _s_net_type(s_net_type_) 47 | { 48 | if (!fileExists(m_CalibTableFilePath, false)) 49 | { 50 | m_ImageList = loadImageList(calibImages, calibImagesPath); 51 | m_ImageList.resize(static_cast(m_ImageList.size() / m_BatchSize) * m_BatchSize); 52 | std::random_shuffle(m_ImageList.begin(), m_ImageList.end(), 53 | [](int i) { return rand() % i; }); 54 | } 55 | 56 | NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float))); 57 | } 58 | 59 | Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); } 60 | 61 | bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)noexcept 62 | { 63 | if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false; 64 | 65 | // Load next batch 66 | std::vector dsImages(m_BatchSize); 67 | for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j) 68 | { 69 | dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW); 70 | } 71 | m_ImageIndex += m_BatchSize; 72 | 73 | cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW); 74 | 75 | NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr(0), m_InputCount * sizeof(float), 76 | cudaMemcpyHostToDevice)); 77 | assert(!strcmp(names[0], m_InputBlobName.c_str())); 78 | bindings[0] = m_DeviceInput; 79 | return true; 80 | } 81 | 82 | const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) noexcept 83 | { 84 | void* output; 85 | m_CalibrationCache.clear(); 86 | assert(!m_CalibTableFilePath.empty()); 87 | std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in); 88 | input >> std::noskipws; 89 | if (m_ReadCache && input.good()) 90 | std::copy(std::istream_iterator(input), std::istream_iterator(), 91 | std::back_inserter(m_CalibrationCache)); 92 | 93 | length = m_CalibrationCache.size(); 94 | if (length) 95 | { 96 | std::cout << "Using cached calibration table to build the engine" << std::endl; 97 | output = &m_CalibrationCache[0]; 98 | } 99 | 100 | else 101 | { 102 | std::cout << "New calibration table will be created to build the engine" << std::endl; 103 | output = nullptr; 104 | } 105 | 106 | return output; 107 | } 108 | 109 | void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) noexcept 110 | { 111 | assert(!m_CalibTableFilePath.empty()); 112 | std::ofstream output(m_CalibTableFilePath, std::ios::binary); 113 | output.write(reinterpret_cast(cache), length); 114 | output.close(); 115 | } -------------------------------------------------------------------------------- /modules/calibrator.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | #ifndef _CALIBRATOR_H_ 26 | #define _CALIBRATOR_H_ 27 | 28 | #include "NvInfer.h" 29 | #include "ds_image.h" 30 | #include "trt_utils.h" 31 | 32 | class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 33 | { 34 | public: 35 | Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, 36 | const std::string& calibImagesPath, const std::string& calibTableFilePath, 37 | const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW, 38 | const std::string& inputBlobName,const std::string &s_net_type_); 39 | virtual ~Int8EntropyCalibrator(); 40 | 41 | int getBatchSize() const noexcept override { return m_BatchSize; } 42 | bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; 43 | const void* readCalibrationCache(size_t& length) noexcept override; 44 | void writeCalibrationCache(const void* cache, size_t length) noexcept override; 45 | 46 | private: 47 | const uint32_t m_BatchSize; 48 | const uint32_t m_InputH; 49 | const uint32_t m_InputW; 50 | const uint64_t m_InputSize; 51 | const uint64_t m_InputCount; 52 | const std::string m_InputBlobName; 53 | const std::string _s_net_type; 54 | const std::string m_CalibTableFilePath{nullptr}; 55 | uint32_t m_ImageIndex; 56 | bool m_ReadCache{true}; 57 | void* m_DeviceInput{nullptr}; 58 | std::vector m_ImageList; 59 | std::vector m_CalibrationCache; 60 | }; 61 | 62 | #endif -------------------------------------------------------------------------------- /modules/chunk.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "chunk.h" 6 | #include 7 | #define NV_CUDA_CHECK(status) \ 8 | { \ 9 | if (status != 0) \ 10 | { \ 11 | std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ 12 | << " at line " << __LINE__ << std::endl; \ 13 | abort(); \ 14 | } \ 15 | } 16 | 17 | 18 | namespace nvinfer1 19 | { 20 | Chunk::Chunk() 21 | { 22 | 23 | } 24 | Chunk::Chunk(const void* buffer, size_t size) 25 | { 26 | assert(size == sizeof(_n_size_split)); 27 | _n_size_split = *reinterpret_cast(buffer); 28 | } 29 | Chunk::~Chunk() 30 | { 31 | 32 | } 33 | int Chunk::getNbOutputs() const noexcept 34 | { 35 | return 2; 36 | } 37 | 38 | Dims Chunk::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept 39 | { 40 | assert(nbInputDims == 1); 41 | assert(index == 0 || index == 1); 42 | return Dims3(inputs[0].d[0] / 2, inputs[0].d[1], inputs[0].d[2]); 43 | } 44 | 45 | int Chunk::initialize() noexcept 46 | { 47 | return 0; 48 | } 49 | 50 | void Chunk::terminate() noexcept 51 | { 52 | } 53 | 54 | size_t Chunk::getWorkspaceSize(int maxBatchSize) const noexcept 55 | { 56 | return 0; 57 | } 58 | 59 | /*int Chunk::enqueue(int batchSize, 60 | const void* const* inputs, 61 | void** outputs, 62 | void* workspace, 63 | cudaStream_t stream)noexcept*/ 64 | 65 | int Chunk::enqueue(int batchSize, 66 | const void* const* inputs, 67 | void* const* outputs, 68 | void* workspace, 69 | cudaStream_t stream) noexcept 70 | { 71 | //batch 72 | for (int b = 0; b < batchSize; ++b) 73 | { 74 | printf("Chunk %d, %d\n", _n_size_split, _n_size_split/sizeof(float)); 75 | /* 76 | NV_CUDA_CHECK(cudaMemcpyAsync((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice, stream)); 77 | NV_CUDA_CHECK(cudaMemcpyAsync((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice, stream)); 78 | */ 79 | NV_CUDA_CHECK(cudaMemcpy((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice)); 80 | NV_CUDA_CHECK(cudaMemcpy((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice)); 81 | } 82 | // NV_CUDA_CHECK(cudaMemcpy(outputs[0], inputs[0], _n_size_split, cudaMemcpyDeviceToDevice)); 83 | // NV_CUDA_CHECK(cudaMemcpy(outputs[1], (void*)((char*)inputs[0] + _n_size_split), _n_size_split, cudaMemcpyDeviceToDevice)); 84 | return 0; 85 | } 86 | 87 | size_t Chunk::getSerializationSize() const noexcept 88 | { 89 | return sizeof(_n_size_split); 90 | } 91 | 92 | void Chunk::serialize(void *buffer)const noexcept 93 | { 94 | *reinterpret_cast(buffer) = _n_size_split; 95 | } 96 | 97 | const char* Chunk::getPluginType()const noexcept 98 | { 99 | return "CHUNK_TRT"; 100 | } 101 | const char* Chunk::getPluginVersion() const noexcept 102 | { 103 | return "1.0"; 104 | } 105 | 106 | void Chunk::destroy() noexcept 107 | { 108 | delete this; 109 | } 110 | 111 | void Chunk::setPluginNamespace(const char* pluginNamespace) noexcept 112 | { 113 | _s_plugin_namespace = pluginNamespace; 114 | } 115 | 116 | const char* Chunk::getPluginNamespace() const noexcept 117 | { 118 | return _s_plugin_namespace.c_str(); 119 | } 120 | 121 | DataType Chunk::getOutputDataType(int index, 122 | const nvinfer1::DataType* inputTypes, 123 | int nbInputs) const noexcept 124 | { 125 | assert(index == 0 || index == 1); 126 | return DataType::kFLOAT; 127 | } 128 | 129 | bool Chunk::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept 130 | { 131 | return false; 132 | } 133 | 134 | bool Chunk::canBroadcastInputAcrossBatch(int inputIndex) const noexcept 135 | { 136 | return false; 137 | } 138 | 139 | void Chunk::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) {} 140 | 141 | void Chunk::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 142 | { 143 | _n_size_split = in->dims.d[0] / 2 * in->dims.d[1] * in->dims.d[2] *sizeof(float); 144 | printf("Chunk in configuring %d, %d\n", _n_size_split, _n_size_split/sizeof(float)); 145 | } 146 | void Chunk::detachFromContext() {} 147 | 148 | bool Chunk::supportsFormat(DataType type, PluginFormat format) const noexcept 149 | { 150 | return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); 151 | } 152 | 153 | void Chunk::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept 154 | { 155 | 156 | } 157 | 158 | // Clone the plugin 159 | IPluginV2* Chunk::clone() const noexcept 160 | { 161 | Chunk *p = new Chunk(); 162 | p->_n_size_split = _n_size_split; 163 | p->setPluginNamespace(_s_plugin_namespace.c_str()); 164 | return p; 165 | } 166 | 167 | //---------------------------- 168 | PluginFieldCollection ChunkPluginCreator::_fc{}; 169 | std::vector ChunkPluginCreator::_vec_plugin_attributes; 170 | 171 | ChunkPluginCreator::ChunkPluginCreator() 172 | { 173 | _vec_plugin_attributes.clear(); 174 | _fc.nbFields = _vec_plugin_attributes.size(); 175 | _fc.fields = _vec_plugin_attributes.data(); 176 | } 177 | 178 | const char* ChunkPluginCreator::getPluginName() const noexcept 179 | { 180 | return "CHUNK_TRT"; 181 | } 182 | 183 | const char* ChunkPluginCreator::getPluginVersion() const noexcept 184 | { 185 | return "1.0"; 186 | } 187 | 188 | const PluginFieldCollection* ChunkPluginCreator::getFieldNames()noexcept 189 | { 190 | return &_fc; 191 | } 192 | 193 | IPluginV2* ChunkPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept 194 | { 195 | Chunk* obj = new Chunk(); 196 | obj->setPluginNamespace(_s_name_space.c_str()); 197 | return obj; 198 | } 199 | 200 | IPluginV2* ChunkPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept 201 | { 202 | Chunk* obj = new Chunk(serialData,serialLength); 203 | obj->setPluginNamespace(_s_name_space.c_str()); 204 | return obj; 205 | } 206 | 207 | void ChunkPluginCreator::setPluginNamespace(const char* libNamespace)noexcept 208 | { 209 | _s_name_space = libNamespace; 210 | } 211 | 212 | const char* ChunkPluginCreator::getPluginNamespace() const noexcept 213 | { 214 | return _s_name_space.c_str(); 215 | } 216 | }//namespace nvinfer1 217 | -------------------------------------------------------------------------------- /modules/chunk.h: -------------------------------------------------------------------------------- 1 | #ifndef CHUNK_H_ 2 | #define CHUNK_H_ 3 | 4 | //#include "NvInfer.h" 5 | //#include "NvInferPlugin.h" 6 | //#include "NvInferRuntimeCommon.h" 7 | //#include 8 | //#include 9 | //#include 10 | //#include 11 | //#include 12 | //#include 13 | //#include 14 | 15 | #include 16 | #include 17 | #include "NvInfer.h" 18 | 19 | namespace nvinfer1 20 | { 21 | class Chunk : public IPluginV2 22 | { 23 | public: 24 | Chunk(); 25 | Chunk(const void* buffer, size_t length); 26 | ~Chunk(); 27 | int getNbOutputs()const noexcept override; 28 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept override; 29 | int initialize()noexcept override; 30 | void terminate()noexcept override; 31 | size_t getWorkspaceSize(int maxBatchSize) const noexcept override; 32 | // int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)noexcept override; 33 | 34 | int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, 35 | cudaStream_t stream) noexcept override; 36 | 37 | size_t getSerializationSize() const noexcept override; 38 | void serialize(void* buffer) const noexcept override; 39 | const char* getPluginType() const noexcept override; 40 | const char* getPluginVersion() const noexcept override; 41 | void destroy()noexcept override; 42 | void setPluginNamespace(const char* pluginNamespace)noexcept override; 43 | const char* getPluginNamespace() const noexcept override; 44 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept; 45 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept; 46 | bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept; 47 | void attachToContext( 48 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator); 49 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput); 50 | void detachFromContext(); 51 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const 52 | { 53 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 54 | } 55 | IPluginV2* clone() const noexcept override; 56 | bool supportsFormat(DataType type, PluginFormat format) const noexcept override; 57 | void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; 58 | 59 | private: 60 | std::string _s_plugin_namespace; 61 | int _n_size_split; 62 | }; 63 | 64 | class ChunkPluginCreator : public IPluginCreator 65 | { 66 | public: 67 | ChunkPluginCreator(); 68 | ~ChunkPluginCreator() override = default; 69 | const char* getPluginName()const noexcept override; 70 | const char* getPluginVersion() const noexcept override; 71 | const PluginFieldCollection* getFieldNames()noexcept override; 72 | IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc)noexcept override; 73 | IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept override; 74 | void setPluginNamespace(const char* libNamespace)noexcept override; 75 | const char* getPluginNamespace() const noexcept override; 76 | private: 77 | std::string _s_name_space; 78 | static PluginFieldCollection _fc; 79 | static std::vector _vec_plugin_attributes; 80 | }; 81 | 82 | }//nampespace nvinfer1 83 | 84 | 85 | #endif 86 | 87 | 88 | -------------------------------------------------------------------------------- /modules/class_detector.cpp: -------------------------------------------------------------------------------- 1 | #include "class_detector.h" 2 | #include "class_yolo_detector.hpp" 3 | 4 | class Detector::Impl 5 | { 6 | public: 7 | Impl() {} 8 | 9 | ~Impl(){} 10 | 11 | YoloDectector _detector; 12 | }; 13 | 14 | Detector::Detector() 15 | { 16 | _impl = new Impl(); 17 | } 18 | 19 | Detector::~Detector() 20 | { 21 | if (_impl) 22 | { 23 | delete _impl; 24 | _impl = nullptr; 25 | } 26 | } 27 | 28 | void Detector::init(const Config &config) 29 | { 30 | _impl->_detector.init(config); 31 | } 32 | 33 | void Detector::dump_profiling(void) 34 | { 35 | _impl->_detector.dump_profiling(); 36 | } 37 | 38 | void Detector::detect(const std::vector &mat_image, std::vector &vec_batch_result, const bool cuda) 39 | { 40 | _impl->_detector.detect(mat_image, vec_batch_result, cuda); 41 | } 42 | 43 | void Detector::save_image(cv::Mat &img, const std::string &dir, const std::string &name) 44 | { 45 | _impl->_detector.save_image(img, dir, name); 46 | } 47 | 48 | void Detector::segment(const std::vector &mat_image, std::string filename) 49 | { 50 | _impl->_detector.segment(mat_image, filename); 51 | } 52 | 53 | void Detector::regress(const std::vector &mat_image, std::vector &vec_batch_result, std::string filename) 54 | { 55 | _impl->_detector.regress(mat_image, vec_batch_result, filename); 56 | } 57 | 58 | void Detector::draw_BBox(cv::Mat &img,Result result) 59 | { 60 | _impl->_detector.draw_BBox(img, result); 61 | } 62 | -------------------------------------------------------------------------------- /modules/class_detector.h: -------------------------------------------------------------------------------- 1 | #ifndef CLASS_DETECTOR_H_ 2 | #define CLASS_DETECTOR_H_ 3 | 4 | #include "API.h" 5 | #include 6 | #include 7 | 8 | struct Result 9 | { 10 | int id = -1; 11 | float prob = 0.f; 12 | cv::Rect rect; 13 | }; 14 | 15 | using BatchResult = std::vector; 16 | 17 | enum ModelType 18 | { 19 | YOLOV3, 20 | YOLOV4, 21 | YOLOV5 22 | }; 23 | 24 | enum Precision 25 | { 26 | INT8 = 0, 27 | FP16, 28 | FP32 29 | }; 30 | 31 | struct Config 32 | { 33 | std::string file_model_cfg = "configs/yolov4.cfg"; 34 | 35 | std::string file_model_weights = "configs/yolov4.weights"; 36 | 37 | float detect_thresh = 0.9; 38 | 39 | ModelType net_type = YOLOV4; 40 | 41 | Precision inference_precison = FP32; 42 | 43 | int gpu_id = 0; 44 | 45 | std::string calibration_image_list_file_txt = "configs/calibration_images.txt"; 46 | int batch = 0; 47 | int width = 0; 48 | int height = 0; 49 | int dla = -1; 50 | }; 51 | 52 | class API Detector 53 | { 54 | public: 55 | explicit Detector(); 56 | 57 | ~Detector(); 58 | 59 | void init(const Config &config); 60 | void save_image(cv::Mat &img, const std::string &dir, const std::string &name); 61 | void segment(const std::vector &mat_image, std::string filename); 62 | void regress(const std::vector &mat_image, std::vector &vec_batch_result, std::string filename); 63 | void detect(const std::vector &mat_image, std::vector &vec_batch_result, const bool cuda); 64 | void dump_profiling(void); 65 | void draw_BBox(cv::Mat &img, Result result); 66 | private: 67 | 68 | Detector(const Detector &); 69 | const Detector &operator =(const Detector &); 70 | class Impl; 71 | Impl *_impl; 72 | }; 73 | 74 | #endif // !CLASS_QH_DETECTOR_H_ 75 | -------------------------------------------------------------------------------- /modules/colormap.hpp: -------------------------------------------------------------------------------- 1 | #define MAX_DISTANCE 120 2 | const unsigned char jet_colormap[MAX_DISTANCE][3] = { 3 | {0, 0, 127}, 4 | {0, 0, 137}, 5 | {0, 0, 146}, 6 | {0, 0, 156}, 7 | {0, 0, 166}, 8 | {0, 0, 176}, 9 | {0, 0, 185}, 10 | {0, 0, 195}, 11 | {0, 0, 205}, 12 | {0, 0, 215}, 13 | {0, 0, 224}, 14 | {0, 0, 234}, 15 | {0, 0, 244}, 16 | {0, 0, 254}, 17 | {0, 0, 255}, 18 | {0, 1, 255}, 19 | {0, 9, 255}, 20 | {0, 18, 255}, 21 | {0, 26, 255}, 22 | {0, 35, 255}, 23 | {0, 43, 255}, 24 | {0, 52, 255}, 25 | {0, 61, 255}, 26 | {0, 69, 255}, 27 | {0, 78, 255}, 28 | {0, 86, 255}, 29 | {0, 95, 255}, 30 | {0, 103, 255}, 31 | {0, 112, 255}, 32 | {0, 121, 255}, 33 | {0, 129, 255}, 34 | {0, 138, 255}, 35 | {0, 146, 255}, 36 | {0, 155, 255}, 37 | {0, 163, 255}, 38 | {0, 172, 255}, 39 | {0, 181, 255}, 40 | {0, 189, 255}, 41 | {0, 198, 255}, 42 | {0, 206, 255}, 43 | {0, 215, 255}, 44 | {0, 223, 251}, 45 | {2, 232, 244}, 46 | {9, 241, 237}, 47 | {16, 249, 230}, 48 | {23, 255, 223}, 49 | {30, 255, 216}, 50 | {36, 255, 209}, 51 | {43, 255, 202}, 52 | {50, 255, 195}, 53 | {57, 255, 189}, 54 | {64, 255, 182}, 55 | {71, 255, 175}, 56 | {78, 255, 168}, 57 | {85, 255, 161}, 58 | {92, 255, 154}, 59 | {99, 255, 147}, 60 | {106, 255, 140}, 61 | {113, 255, 133}, 62 | {119, 255, 126}, 63 | {126, 255, 119}, 64 | {133, 255, 113}, 65 | {140, 255, 106}, 66 | {147, 255, 99}, 67 | {154, 255, 92}, 68 | {161, 255, 85}, 69 | {168, 255, 78}, 70 | {175, 255, 71}, 71 | {182, 255, 64}, 72 | {189, 255, 57}, 73 | {195, 255, 50}, 74 | {202, 255, 43}, 75 | {209, 255, 36}, 76 | {216, 255, 30}, 77 | {223, 255, 23}, 78 | {230, 255, 16}, 79 | {237, 255, 9}, 80 | {244, 248, 2}, 81 | {251, 240, 0}, 82 | {255, 232, 0}, 83 | {255, 224, 0}, 84 | {255, 216, 0}, 85 | {255, 208, 0}, 86 | {255, 200, 0}, 87 | {255, 192, 0}, 88 | {255, 184, 0}, 89 | {255, 176, 0}, 90 | {255, 168, 0}, 91 | {255, 161, 0}, 92 | {255, 153, 0}, 93 | {255, 145, 0}, 94 | {255, 137, 0}, 95 | {255, 129, 0}, 96 | {255, 121, 0}, 97 | {255, 113, 0}, 98 | {255, 105, 0}, 99 | {255, 97, 0}, 100 | {255, 89, 0}, 101 | {255, 81, 0}, 102 | {255, 73, 0}, 103 | {255, 65, 0}, 104 | {255, 57, 0}, 105 | {255, 49, 0}, 106 | {255, 41, 0}, 107 | {255, 34, 0}, 108 | {255, 26, 0}, 109 | {254, 18, 0}, 110 | {244, 10, 0}, 111 | {234, 2, 0}, 112 | {224, 0, 0}, 113 | {215, 0, 0}, 114 | {205, 0, 0}, 115 | {195, 0, 0}, 116 | {185, 0, 0}, 117 | {176, 0, 0}, 118 | {166, 0, 0}, 119 | {156, 0, 0}, 120 | {146, 0, 0}, 121 | {137, 0, 0}, 122 | {127, 0, 0} 123 | }; 124 | 125 | unsigned char magma_colormap[MAX_DISTANCE][3] = { 126 | {251, 252, 191}, 127 | {251, 249, 187}, 128 | {252, 245, 183}, 129 | {252, 241, 179}, 130 | {252, 238, 176}, 131 | {252, 234, 172}, 132 | {252, 230, 168}, 133 | {253, 225, 163}, 134 | {253, 221, 159}, 135 | {253, 218, 156}, 136 | {253, 214, 152}, 137 | {253, 210, 149}, 138 | {253, 207, 146}, 139 | {254, 203, 142}, 140 | {254, 198, 137}, 141 | {254, 194, 134}, 142 | {254, 190, 131}, 143 | {254, 187, 128}, 144 | {254, 183, 125}, 145 | {254, 179, 123}, 146 | {254, 174, 118}, 147 | {254, 170, 116}, 148 | {254, 166, 113}, 149 | {253, 162, 111}, 150 | {253, 159, 108}, 151 | {253, 155, 106}, 152 | {253, 151, 104}, 153 | {252, 146, 101}, 154 | {252, 142, 99}, 155 | {251, 138, 98}, 156 | {251, 134, 96}, 157 | {250, 130, 95}, 158 | {250, 127, 94}, 159 | {249, 123, 93}, 160 | {248, 117, 92}, 161 | {247, 113, 91}, 162 | {246, 110, 91}, 163 | {245, 106, 91}, 164 | {243, 103, 91}, 165 | {242, 99, 92}, 166 | {239, 94, 93}, 167 | {238, 91, 94}, 168 | {236, 88, 95}, 169 | {234, 85, 96}, 170 | {231, 82, 98}, 171 | {229, 80, 99}, 172 | {226, 77, 101}, 173 | {222, 74, 103}, 174 | {220, 72, 105}, 175 | {217, 70, 106}, 176 | {214, 68, 108}, 177 | {211, 66, 109}, 178 | {208, 65, 111}, 179 | {203, 62, 113}, 180 | {200, 61, 114}, 181 | {197, 60, 116}, 182 | {194, 58, 117}, 183 | {190, 57, 118}, 184 | {187, 56, 119}, 185 | {184, 55, 120}, 186 | {179, 53, 122}, 187 | {176, 52, 123}, 188 | {172, 51, 123}, 189 | {169, 50, 124}, 190 | {166, 49, 125}, 191 | {163, 48, 126}, 192 | {159, 47, 126}, 193 | {154, 45, 127}, 194 | {151, 44, 127}, 195 | {148, 43, 128}, 196 | {145, 42, 128}, 197 | {141, 41, 128}, 198 | {138, 40, 129}, 199 | {133, 38, 129}, 200 | {130, 37, 129}, 201 | {127, 36, 129}, 202 | {124, 35, 129}, 203 | {121, 34, 129}, 204 | {118, 33, 129}, 205 | {115, 31, 129}, 206 | {110, 30, 129}, 207 | {107, 28, 128}, 208 | {104, 27, 128}, 209 | {101, 26, 128}, 210 | {97, 24, 127}, 211 | {94, 23, 127}, 212 | {90, 21, 126}, 213 | {87, 20, 125}, 214 | {83, 19, 124}, 215 | {80, 18, 123}, 216 | {77, 17, 122}, 217 | {74, 16, 121}, 218 | {71, 15, 119}, 219 | {66, 15, 116}, 220 | {62, 15, 114}, 221 | {59, 15, 111}, 222 | {55, 15, 108}, 223 | {52, 16, 104}, 224 | {48, 16, 101}, 225 | {45, 16, 96}, 226 | {40, 17, 89}, 227 | {37, 17, 85}, 228 | {34, 17, 80}, 229 | {31, 17, 75}, 230 | {28, 16, 70}, 231 | {26, 16, 65}, 232 | {22, 14, 58}, 233 | {20, 13, 53}, 234 | {17, 12, 49}, 235 | {15, 11, 44}, 236 | {13, 10, 40}, 237 | {11, 8, 36}, 238 | {9, 7, 31}, 239 | {6, 5, 25}, 240 | {4, 4, 21}, 241 | {3, 3, 17}, 242 | {2, 2, 13}, 243 | {1, 1, 9}, 244 | {0, 0, 6}, 245 | {0, 0, 3}, 246 | }; 247 | -------------------------------------------------------------------------------- /modules/detect.cu: -------------------------------------------------------------------------------- 1 | //sys 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | //my 11 | #include "detect.h" 12 | 13 | #define NV_CUDA_CHECK(status) \ 14 | { \ 15 | if (status != 0) \ 16 | { \ 17 | std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ 18 | << " at line " << __LINE__ << std::endl; \ 19 | abort(); \ 20 | } \ 21 | } 22 | 23 | namespace nvinfer1 24 | { 25 | Detect::Detect() 26 | { 27 | } 28 | 29 | Detect::Detect(const void* data, size_t length) 30 | { 31 | const char *d = reinterpret_cast(data), *a = d; 32 | read(d,_n_anchor); 33 | read(d,_n_classes); 34 | read(d,_n_grid_h); 35 | read(d, _n_grid_w); 36 | read(d, _n_output_size); 37 | //printf("anchor:%d,classes:%d,gh:%d,gw:%d,size:%d\n", _n_anchor, _n_classes, _n_grid_h, _n_grid_w, _n_output_size); 38 | assert(d == a + length); 39 | } 40 | 41 | Detect::Detect(const uint32_t n_anchor_, const uint32_t n_classes_, 42 | const uint32_t n_grid_h_, const uint32_t n_grid_w_/*, 43 | const uint32_t &n_stride_h_, const uint32_t &n_stride_w_*/): 44 | _n_anchor(n_anchor_), 45 | _n_classes(n_classes_), 46 | _n_grid_h(n_grid_h_), 47 | _n_grid_w(n_grid_w_) 48 | { 49 | _n_output_size = (5 + _n_classes)*_n_anchor*_n_grid_h*_n_grid_w; 50 | } 51 | Detect::~Detect() 52 | {} 53 | 54 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 55 | 56 | __global__ void gpu_detect_layer(const float *input_, 57 | float* output_, 58 | const uint32_t n_grid_h_, 59 | const uint32_t n_grid_w_, 60 | const uint32_t n_classes_, 61 | const uint32_t n_anchor_) 62 | { 63 | uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x; 64 | uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y; 65 | uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z; 66 | 67 | if ((x_id >= n_grid_w_) || (y_id >= n_grid_h_) || (z_id >= n_anchor_)) 68 | { 69 | return; 70 | } 71 | // printf("grid_h:%d,grid_w:%d,class:%d,anchor:%d\n", n_grid_h_, n_grid_w_, n_classes_, n_anchor_); 72 | const int numGridCells = n_grid_h_ * n_grid_w_; 73 | const int bbindex = y_id * n_grid_w_ + x_id; 74 | 75 | output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 0)] 76 | = 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 0)])-0.5f; 77 | 78 | output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 1)] 79 | = 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 1)])-0.5f; 80 | 81 | float w = 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 2)]); 82 | output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 2)] 83 | = w*w; 84 | 85 | float h = 2.f* sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 3)]); 86 | output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 3)] 87 | = h*h; 88 | 89 | output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 4)] 90 | = sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 4)]); 91 | for (uint32_t i = 0; i < n_classes_; ++i) 92 | { 93 | output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + (5 + i))] 94 | = sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + (5 + i))]); 95 | } 96 | } 97 | 98 | cudaError_t cuda_detect_layer(const void* input_, 99 | void* output_, 100 | const uint32_t& batch_size_, 101 | const uint32_t& grid_h_, 102 | const uint32_t& grid_w_, 103 | const uint32_t& n_classes_, 104 | const uint32_t& n_anchor_, 105 | uint64_t n_output_size_, 106 | cudaStream_t stream_) 107 | { 108 | dim3 threads_per_block(16, 16, 4); 109 | dim3 number_of_blocks((grid_w_ / threads_per_block.x) + 1, 110 | (grid_h_ / threads_per_block.y) + 1, 111 | (n_anchor_ / threads_per_block.z) + 1); 112 | for (int batch = 0; batch < batch_size_; ++batch) 113 | { 114 | gpu_detect_layer << > >( 115 | reinterpret_cast(input_) + (batch * n_output_size_), 116 | reinterpret_cast(output_) + (batch * n_output_size_), 117 | grid_h_, 118 | grid_w_, 119 | n_classes_, 120 | n_anchor_); 121 | } 122 | return cudaGetLastError(); 123 | } 124 | 125 | int Detect::enqueue(int batchSize, 126 | const void* const* inputs, 127 | void* const* outputs, 128 | void* workspace, 129 | cudaStream_t stream) noexcept 130 | { 131 | NV_CUDA_CHECK(cuda_detect_layer(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, _n_classes, _n_anchor, _n_output_size, stream)); 132 | return 0; 133 | } 134 | 135 | 136 | bool Detect::supportsFormat(DataType type, PluginFormat format) const noexcept 137 | { 138 | return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); 139 | } 140 | 141 | void Detect::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept 142 | { 143 | 144 | } 145 | 146 | size_t Detect::getSerializationSize() const noexcept 147 | { 148 | return sizeof(_n_anchor) + sizeof(_n_classes) + sizeof(_n_grid_h) + sizeof(_n_grid_w) 149 | + sizeof(_n_output_size); 150 | } 151 | 152 | void Detect::serialize(void *buffer) const noexcept 153 | { 154 | char *d = static_cast(buffer), *a = d; 155 | write(d,_n_anchor); 156 | write(d, _n_classes); 157 | write(d, _n_grid_h); 158 | write(d, _n_grid_w); 159 | write(d, _n_output_size); 160 | assert(d == a + getSerializationSize()); 161 | } 162 | 163 | void Detect::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 164 | { 165 | 166 | } 167 | IPluginV2* Detect::clone() const noexcept 168 | { 169 | Detect *p = new Detect(_n_anchor,_n_classes,_n_grid_h,_n_grid_w); 170 | p->setPluginNamespace(_s_plugin_namespace.c_str()); 171 | return p; 172 | } 173 | 174 | 175 | // 176 | PluginFieldCollection DetectPluginCreator::_fc{}; 177 | std::vector DetectPluginCreator::_vec_plugin_attributes; 178 | 179 | DetectPluginCreator::DetectPluginCreator() 180 | { 181 | _vec_plugin_attributes.clear(); 182 | _fc.nbFields = _vec_plugin_attributes.size(); 183 | _fc.fields = _vec_plugin_attributes.data(); 184 | } 185 | 186 | const char* DetectPluginCreator::getPluginName() const noexcept 187 | { 188 | return "DETECT_TRT"; 189 | } 190 | 191 | const char* DetectPluginCreator::getPluginVersion() const noexcept 192 | { 193 | return "1.0"; 194 | } 195 | 196 | const PluginFieldCollection* DetectPluginCreator::getFieldNames() noexcept 197 | { 198 | return &_fc; 199 | } 200 | 201 | IPluginV2* DetectPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) noexcept 202 | { 203 | Detect* obj = new Detect(); 204 | obj->setPluginNamespace(_s_name_space.c_str()); 205 | return obj; 206 | } 207 | 208 | IPluginV2* DetectPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept 209 | { 210 | Detect* obj = new Detect(serialData, serialLength); 211 | obj->setPluginNamespace(_s_name_space.c_str()); 212 | return obj; 213 | } 214 | 215 | void DetectPluginCreator::setPluginNamespace(const char* libNamespace) noexcept 216 | { 217 | _s_name_space = libNamespace; 218 | } 219 | 220 | const char* DetectPluginCreator::getPluginNamespace() const noexcept 221 | { 222 | return _s_name_space.c_str(); 223 | } 224 | }//end namespace nvinfer1 225 | -------------------------------------------------------------------------------- /modules/detect.h: -------------------------------------------------------------------------------- 1 | #ifndef _DETECT_H_ 2 | #define _DETECT_H_ 3 | 4 | #include 5 | #include 6 | #include "NvInfer.h" 7 | 8 | namespace nvinfer1 9 | { 10 | template 11 | void write(char*& buffer, const T& val) 12 | { 13 | *reinterpret_cast(buffer) = val; 14 | buffer += sizeof(T); 15 | } 16 | 17 | template 18 | void read(const char*& buffer, T& val) 19 | { 20 | val = *reinterpret_cast(buffer); 21 | buffer += sizeof(T); 22 | } 23 | 24 | class Detect :public IPluginV2 25 | { 26 | public: 27 | Detect(); 28 | Detect(const void* data, size_t length); 29 | Detect(const uint32_t n_anchor_, const uint32_t _n_classes_, 30 | const uint32_t n_grid_h_, const uint32_t n_grid_w_/*, 31 | const uint32_t &n_stride_h_, const uint32_t &n_stride_w_*/); 32 | ~Detect(); 33 | int getNbOutputs()const noexcept override 34 | { 35 | return 1; 36 | } 37 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept override 38 | { 39 | return inputs[0]; 40 | } 41 | int initialize() noexcept override 42 | { 43 | return 0; 44 | } 45 | void terminate() noexcept override 46 | { 47 | } 48 | size_t getWorkspaceSize(int maxBatchSize) const noexcept override 49 | { 50 | return 0; 51 | } 52 | int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, 53 | cudaStream_t stream) noexcept override; 54 | 55 | bool supportsFormat(DataType type, PluginFormat format) const noexcept override; 56 | void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; 57 | 58 | size_t getSerializationSize() const noexcept override; 59 | void serialize(void* buffer) const noexcept override; 60 | const char* getPluginType() const noexcept override 61 | { 62 | return "DETECT_TRT"; 63 | } 64 | const char* getPluginVersion() const noexcept override 65 | { 66 | return "1.0"; 67 | } 68 | void destroy() noexcept override 69 | { 70 | delete this; 71 | } 72 | void setPluginNamespace(const char* pluginNamespace) noexcept override 73 | { 74 | _s_plugin_namespace = pluginNamespace; 75 | } 76 | const char* getPluginNamespace() const noexcept override 77 | { 78 | return _s_plugin_namespace.c_str(); 79 | } 80 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept 81 | { 82 | return DataType::kFLOAT; 83 | } 84 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept 85 | { 86 | return false; 87 | } 88 | bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept 89 | { 90 | return false; 91 | } 92 | void attachToContext( 93 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) 94 | {} 95 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) ; 96 | void detachFromContext() 97 | {} 98 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept 99 | { 100 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 101 | } 102 | IPluginV2* clone() const noexcept override; 103 | private: 104 | 105 | uint32_t _n_anchor; 106 | uint32_t _n_classes; 107 | uint32_t _n_grid_h; 108 | uint32_t _n_grid_w; 109 | //uint32_t _n_stride_h; 110 | // uint32_t _n_stride_w; 111 | uint64_t _n_output_size; 112 | std::string _s_plugin_namespace; 113 | }; //end detect 114 | 115 | class DetectPluginCreator : public IPluginCreator 116 | { 117 | public: 118 | DetectPluginCreator(); 119 | ~DetectPluginCreator() override = default; 120 | const char* getPluginName()const noexcept override; 121 | const char* getPluginVersion() const noexcept override; 122 | const PluginFieldCollection* getFieldNames() noexcept override; 123 | IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; 124 | IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; 125 | void setPluginNamespace(const char* libNamespace) noexcept override; 126 | const char* getPluginNamespace() const noexcept override; 127 | private: 128 | std::string _s_name_space; 129 | static PluginFieldCollection _fc; 130 | static std::vector _vec_plugin_attributes; 131 | };//end detect creator 132 | 133 | }//end namespace nvinfer1 134 | 135 | 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /modules/ds_image.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | #include "ds_image.h" 26 | #include 27 | 28 | DsImage::DsImage() : 29 | m_Height(0), 30 | m_Width(0), 31 | m_XOffset(0), 32 | m_YOffset(0), 33 | m_ScalingFactor(0.0), 34 | m_RNG(cv::RNG(unsigned(std::time(0)))), 35 | m_ImageName() 36 | { 37 | } 38 | 39 | DsImage::DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, const int& inputH, const int& inputW) : 40 | m_Height(0), 41 | m_Width(0), 42 | m_XOffset(0), 43 | m_YOffset(0), 44 | m_ScalingFactor(0.0), 45 | m_RNG(cv::RNG(unsigned(std::time(0)))), 46 | m_ImageName() 47 | { 48 | m_OrigImage = mat_image_; 49 | m_Height = m_OrigImage.rows; 50 | m_Width = m_OrigImage.cols; 51 | if (!m_OrigImage.data || m_OrigImage.cols <= 0 || m_OrigImage.rows <= 0) 52 | { 53 | std::cout << "empty image !"<< std::endl; 54 | assert(0); 55 | } 56 | if (m_OrigImage.channels() != 3) 57 | { 58 | std::cout << "Non RGB images are not supported "<< std::endl; 59 | assert(0); 60 | } 61 | if ("yolov5" == s_net_type_) 62 | { 63 | // resize the DsImage with scale 64 | float r = std::min(static_cast(inputH) / static_cast(m_Height), static_cast(inputW) / static_cast(m_Width)); 65 | int resizeH = (std::round(m_Height*r)); 66 | int resizeW = (std::round(m_Width*r)); 67 | 68 | // Additional checks for images with non even dims 69 | if ((inputW - resizeW) % 2) resizeW--; 70 | if ((inputH - resizeH) % 2) resizeH--; 71 | assert((inputW - resizeW) % 2 == 0); 72 | assert((inputH - resizeH) % 2 == 0); 73 | 74 | m_XOffset = (inputW - resizeW) / 2; 75 | m_YOffset = (inputH - resizeH) / 2; 76 | 77 | assert(2 * m_XOffset + resizeW == inputW); 78 | assert(2 * m_YOffset + resizeH == inputH); 79 | 80 | // resizing 81 | cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC); 82 | // letterboxing 83 | cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset, 84 | m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128)); 85 | // cv::imwrite("letter.jpg", m_LetterboxImage); 86 | // converting to RGB 87 | //cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); 88 | } 89 | else 90 | { 91 | cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC); 92 | // converting to RGB 93 | //cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); 94 | } 95 | 96 | } 97 | DsImage::DsImage(const std::string& path, const std::string &s_net_type_, const int& inputH, const int& inputW) : 98 | m_Height(0), 99 | m_Width(0), 100 | m_XOffset(0), 101 | m_YOffset(0), 102 | m_ScalingFactor(0.0), 103 | m_RNG(cv::RNG(unsigned(std::time(0)))), 104 | m_ImageName() 105 | { 106 | m_ImageName = std::experimental::filesystem::path(path).stem().string(); 107 | m_OrigImage = cv::imread(path, cv::IMREAD_UNCHANGED); 108 | m_Height = m_OrigImage.rows; 109 | m_Width = m_OrigImage.cols; 110 | if (!m_OrigImage.data || m_OrigImage.cols <= 0 || m_OrigImage.rows <= 0) 111 | { 112 | std::cout << "Unable to open image : " << path << std::endl; 113 | assert(0); 114 | } 115 | 116 | if (m_OrigImage.channels() != 3) 117 | { 118 | std::cout << "Non RGB images are not supported : " << path << std::endl; 119 | assert(0); 120 | } 121 | 122 | if ("yolov5" == s_net_type_) 123 | { 124 | // resize the DsImage with scale 125 | float dim = std::max(m_Height, m_Width); 126 | int resizeH = ((m_Height / dim) * inputH); 127 | int resizeW = ((m_Width / dim) * inputW); 128 | m_ScalingFactor = static_cast(resizeH) / static_cast(m_Height); 129 | float m_ScalingFactorw = static_cast(resizeW) / static_cast(m_Width); 130 | 131 | // Additional checks for images with non even dims 132 | if ((inputW - resizeW) % 2) resizeW--; 133 | if ((inputH - resizeH) % 2) resizeH--; 134 | assert((inputW - resizeW) % 2 == 0); 135 | assert((inputH - resizeH) % 2 == 0); 136 | 137 | m_XOffset = (inputW - resizeW) / 2; 138 | m_YOffset = (inputH - resizeH) / 2; 139 | 140 | assert(2 * m_XOffset + resizeW == inputW); 141 | assert(2 * m_YOffset + resizeH == inputH); 142 | 143 | // resizing 144 | cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC); 145 | // letterboxing 146 | cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset, 147 | m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128)); 148 | // cv::imwrite("letter.jpg", m_LetterboxImage); 149 | // converting to RGB 150 | // cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); 151 | } 152 | else 153 | { 154 | cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC); 155 | // converting to RGB 156 | // cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); 157 | } 158 | } 159 | 160 | void DsImage::letterbox(const int& inputH, const int& inputW) 161 | { 162 | //m_OrigImage.copyTo(m_MarkedImage); 163 | m_Height = m_OrigImage.rows; 164 | m_Width = m_OrigImage.cols; 165 | 166 | // resize the DsImage with scale 167 | float dim = std::max(m_Height, m_Width); 168 | int resizeH = ((m_Height / dim) * inputH); 169 | int resizeW = ((m_Width / dim) * inputW); 170 | m_ScalingFactor = static_cast(resizeH) / static_cast(m_Height); 171 | float m_ScalingFactorw = static_cast(resizeW) / static_cast(m_Width); 172 | 173 | // Additional checks for images with non even dims 174 | if ((inputW - resizeW) % 2) resizeW--; 175 | if ((inputH - resizeH) % 2) resizeH--; 176 | assert((inputW - resizeW) % 2 == 0); 177 | assert((inputH - resizeH) % 2 == 0); 178 | 179 | m_XOffset = (inputW - resizeW) / 2; 180 | m_YOffset = (inputH - resizeH) / 2; 181 | 182 | assert(2 * m_XOffset + resizeW == inputW); 183 | assert(2 * m_YOffset + resizeH == inputH); 184 | 185 | // resizing 186 | cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC); 187 | // letterboxing 188 | cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset, 189 | m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128)); 190 | // cv::imwrite("letter.jpg", m_LetterboxImage); 191 | // converting to RGB 192 | cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); 193 | } 194 | 195 | 196 | void DsImage::addBBox(BBoxInfo box, const std::string& labelName) 197 | { 198 | m_Bboxes.push_back(box); 199 | const int x = box.box.x1; 200 | const int y = box.box.y1; 201 | const int w = box.box.x2 - box.box.x1; 202 | const int h = box.box.y2 - box.box.y1; 203 | const cv::Scalar color 204 | = cv::Scalar(m_RNG.uniform(0, 255), m_RNG.uniform(0, 255), m_RNG.uniform(0, 255)); 205 | 206 | cv::rectangle(m_MarkedImage, cv::Rect(x, y, w, h), color, 1); 207 | const cv::Size tsize 208 | = cv::getTextSize(labelName, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, 1, nullptr); 209 | cv::rectangle(m_MarkedImage, cv::Rect(x, y, tsize.width + 3, tsize.height + 4), color, -1); 210 | cv::putText(m_MarkedImage, labelName.c_str(), cv::Point(x, y + tsize.height), 211 | cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, cv::Scalar(255, 255, 255), 1); 212 | } 213 | 214 | void DsImage::showImage() const 215 | { 216 | cv::namedWindow(m_ImageName); 217 | cv::imshow(m_ImageName.c_str(), m_MarkedImage); 218 | cv::waitKey(0); 219 | } 220 | 221 | void DsImage::saveImageJPEG(const std::string& dirPath) const 222 | { 223 | cv::imwrite(dirPath + m_ImageName + ".jpeg", m_MarkedImage); 224 | } 225 | std::string DsImage::exportJson() const 226 | { 227 | if (m_Bboxes.empty()) return ""; 228 | std::stringstream json; 229 | json.precision(2); 230 | json << std::fixed; 231 | for (uint32_t i = 0; i < m_Bboxes.size(); ++i) 232 | { 233 | json << "\n{\n"; 234 | json << " \"image_id\" : " << std::stoi(m_ImageName) << ",\n"; 235 | json << " \"category_id\" : " << m_Bboxes.at(i).classId << ",\n"; 236 | json << " \"bbox\" : "; 237 | json << "[" << m_Bboxes.at(i).box.x1 << ", " << m_Bboxes.at(i).box.y1 << ", "; 238 | json << m_Bboxes.at(i).box.x2 - m_Bboxes.at(i).box.x1 << ", " 239 | << m_Bboxes.at(i).box.y2 - m_Bboxes.at(i).box.y1 << "],\n"; 240 | json << " \"score\" : " << m_Bboxes.at(i).prob << "\n"; 241 | if (i != m_Bboxes.size() - 1) 242 | json << "},"; 243 | else 244 | json << "}"; 245 | } 246 | return json.str(); 247 | } -------------------------------------------------------------------------------- /modules/ds_image.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | #ifndef __IMAGE_H__ 26 | #define __IMAGE_H__ 27 | 28 | #include "trt_utils.h" 29 | 30 | struct BBoxInfo; 31 | 32 | class DsImage 33 | { 34 | public: 35 | DsImage(); 36 | DsImage(const std::string& path, const std::string &s_net_type_, const int& inputH, const int& inputW); 37 | DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, const int& inputH, const int& inputW); 38 | int getImageHeight() const { return m_Height; } 39 | int getImageWidth() const { return m_Width; } 40 | cv::Mat getLetterBoxedImage() const { return m_LetterboxImage; } 41 | cv::Mat getOriginalImage() const { return m_OrigImage; } 42 | std::string getImageName() const { return m_ImageName; } 43 | void addBBox(BBoxInfo box, const std::string& labelName); 44 | void showImage() const; 45 | void saveImageJPEG(const std::string& dirPath) const; 46 | std::string exportJson() const; 47 | void letterbox(const int& inputH, const int& inputW); 48 | private: 49 | int m_Height; 50 | int m_Width; 51 | int m_XOffset; 52 | int m_YOffset; 53 | float m_ScalingFactor; 54 | std::string m_ImagePath; 55 | cv::RNG m_RNG; 56 | std::string m_ImageName; 57 | std::vector m_Bboxes; 58 | 59 | // unaltered original Image 60 | cv::Mat m_OrigImage; 61 | // letterboxed Image given to the network as input 62 | cv::Mat m_LetterboxImage; 63 | // final image marked with the bounding boxes 64 | cv::Mat m_MarkedImage; 65 | }; 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /modules/err.log: -------------------------------------------------------------------------------- 1 | make: *** ターゲットが指定されておらず, makefile も見つかりません. 中止. 2 | -------------------------------------------------------------------------------- /modules/hardswish.cu: -------------------------------------------------------------------------------- 1 | //sys 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | //my 11 | #include "hardswish.h" 12 | 13 | #define NV_CUDA_CHECK(status) \ 14 | { \ 15 | if (status != 0) \ 16 | { \ 17 | std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ 18 | << " at line " << __LINE__ << std::endl; \ 19 | abort(); \ 20 | } \ 21 | } 22 | 23 | namespace nvinfer1 24 | { 25 | Hardswish::Hardswish() 26 | { 27 | cudaDeviceProp prop; 28 | cudaGetDeviceProperties(&prop, 0); 29 | _n_max_thread_pre_block = prop.maxThreadsPerBlock; 30 | // printf("Hardswish():%d\n", _n_max_thread_pre_block); 31 | } 32 | 33 | Hardswish::Hardswish(const void* data, size_t length) 34 | { 35 | const char *d = reinterpret_cast(data), *a = d; 36 | r(d, _n_max_thread_pre_block); 37 | r(d, _n_output_size); 38 | // printf("r:threads:%d,size:%d\n", _n_max_thread_pre_block, _n_output_size); 39 | assert(d == a + length); 40 | } 41 | 42 | Hardswish::~Hardswish() 43 | {} 44 | 45 | __global__ void kernel_hardswish(const float *input_, float *output_, int n_data_size_) 46 | { 47 | int i = threadIdx.x + blockIdx.x * blockDim.x; 48 | if (i >= n_data_size_)return; 49 | if (input_[i] >= 3.0f) 50 | { 51 | output_[i] = input_[i]; 52 | } 53 | else if (input_[i] <= -3.0f) 54 | { 55 | output_[i] = 0.0f; 56 | } 57 | else 58 | { 59 | output_[i] = input_[i] * (input_[i] + 3.0f) / 6.0f; 60 | } 61 | } 62 | 63 | cudaError_t cuda_hardswish_layer(const void* input_, 64 | void* output_, 65 | const int n_batch_size_, 66 | const int n_output_size_, 67 | const int threads_, 68 | cudaStream_t stream_) 69 | { 70 | int n_data_size = n_batch_size_ * n_output_size_; 71 | // printf("cuda_hardswish_layer:%d,size:%d\n", n_batch_size_, n_output_size_); 72 | kernel_hardswish << <(n_data_size + threads_ -1)/threads_, threads_, 0, stream_ >> >( 73 | reinterpret_cast(input_), 74 | reinterpret_cast(output_), 75 | n_data_size); 76 | return cudaGetLastError(); 77 | } 78 | 79 | int Hardswish::enqueue(int batchSize, 80 | const void* const* inputs, 81 | void* const* outputs, 82 | void* workspace, 83 | cudaStream_t stream) noexcept 84 | { 85 | // printf("batch_size:%d,output_size:%d,threads:%d\n", batchSize, _n_output_size, _n_max_thread_pre_block); 86 | NV_CUDA_CHECK(cuda_hardswish_layer(inputs[0], outputs[0], batchSize, _n_output_size , _n_max_thread_pre_block, stream)); 87 | return 0; 88 | } 89 | 90 | size_t Hardswish::getSerializationSize() const noexcept 91 | { 92 | return sizeof(_n_max_thread_pre_block) +sizeof(_n_output_size); 93 | } 94 | 95 | void Hardswish::serialize(void *buffer) const noexcept 96 | { 97 | char *d = static_cast(buffer), *a = d; 98 | w(d, _n_max_thread_pre_block); 99 | w(d, _n_output_size); 100 | // printf("serialize:%d,%d\n", _n_max_thread_pre_block, _n_output_size); 101 | assert(d == a + getSerializationSize()); 102 | } 103 | 104 | 105 | bool Hardswish::supportsFormat(DataType type, PluginFormat format) const noexcept 106 | { 107 | return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); 108 | } 109 | 110 | void Hardswish::configureWithFormat(const Dims* inputDims, int nbInputs, 111 | const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept 112 | { 113 | 114 | } 115 | 116 | 117 | void Hardswish::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) noexcept 118 | { 119 | 120 | _n_output_size = in->dims.d[0] * in->dims.d[1] * in->dims.d[2]; 121 | // printf("configurePlugin:%d,%d,%d\n", in->dims.d[0], in->dims.d[1], in->dims.d[2]); 122 | } 123 | IPluginV2* Hardswish::clone() const noexcept 124 | { 125 | Hardswish *p = new Hardswish(); 126 | p->setPluginNamespace(_s_plugin_namespace.c_str()); 127 | p->_n_max_thread_pre_block = _n_max_thread_pre_block; 128 | p->_n_output_size = _n_output_size; 129 | // printf("clone:%d,%d\n", _n_max_thread_pre_block, _n_output_size); 130 | return p; 131 | } 132 | 133 | 134 | // 135 | PluginFieldCollection HardswishPluginCreator::_fc{}; 136 | std::vector HardswishPluginCreator::_vec_plugin_attributes; 137 | 138 | HardswishPluginCreator::HardswishPluginCreator() 139 | { 140 | _vec_plugin_attributes.clear(); 141 | _fc.nbFields = _vec_plugin_attributes.size(); 142 | _fc.fields = _vec_plugin_attributes.data(); 143 | } 144 | 145 | const char* HardswishPluginCreator::getPluginName() const noexcept 146 | { 147 | return "HARDSWISH_TRT"; 148 | } 149 | 150 | const char* HardswishPluginCreator::getPluginVersion() const noexcept 151 | { 152 | return "1.0"; 153 | } 154 | 155 | const PluginFieldCollection* HardswishPluginCreator::getFieldNames() noexcept 156 | { 157 | return &_fc; 158 | } 159 | 160 | IPluginV2* HardswishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) noexcept 161 | { 162 | Hardswish* obj = new Hardswish(); 163 | obj->setPluginNamespace(_s_name_space.c_str()); 164 | return obj; 165 | } 166 | 167 | IPluginV2* HardswishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept 168 | { 169 | Hardswish* obj = new Hardswish(serialData, serialLength); 170 | obj->setPluginNamespace(_s_name_space.c_str()); 171 | return obj; 172 | } 173 | 174 | void HardswishPluginCreator::setPluginNamespace(const char* libNamespace) noexcept 175 | { 176 | _s_name_space = libNamespace; 177 | } 178 | 179 | const char* HardswishPluginCreator::getPluginNamespace() const noexcept 180 | { 181 | return _s_name_space.c_str(); 182 | } 183 | }//end namespace nvinfer1 184 | -------------------------------------------------------------------------------- /modules/hardswish.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _HARDSWISH_H_ 3 | #define _HARDSWISH_H_ 4 | 5 | #include 6 | #include 7 | #include "NvInfer.h" 8 | 9 | namespace nvinfer1 10 | { 11 | template 12 | void w(char*& buffer, const T& val) 13 | { 14 | *reinterpret_cast(buffer) = val; 15 | buffer += sizeof(T); 16 | } 17 | 18 | template 19 | void r(const char*& buffer, T& val) 20 | { 21 | val = *reinterpret_cast(buffer); 22 | buffer += sizeof(T); 23 | } 24 | 25 | class Hardswish :public IPluginV2 26 | { 27 | public: 28 | Hardswish(); 29 | Hardswish(const void* data, size_t length); 30 | ~Hardswish(); 31 | int getNbOutputs()const noexcept override 32 | { 33 | return 1; 34 | } 35 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept override 36 | { 37 | return inputs[0]; 38 | } 39 | int initialize() noexcept override 40 | { 41 | return 0; 42 | } 43 | void terminate() noexcept override 44 | { 45 | } 46 | size_t getWorkspaceSize(int maxBatchSize) const noexcept override 47 | { 48 | return 0; 49 | } 50 | 51 | bool supportsFormat(DataType type, PluginFormat format) const noexcept override; 52 | void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; 53 | 54 | int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, 55 | cudaStream_t stream) noexcept override; 56 | 57 | size_t getSerializationSize() const noexcept override; 58 | void serialize(void* buffer) const noexcept override; 59 | const char* getPluginType() const noexcept override 60 | { 61 | return "HARDSWISH_TRT"; 62 | } 63 | const char* getPluginVersion() const noexcept override 64 | { 65 | return "1.0"; 66 | } 67 | void destroy() noexcept override 68 | { 69 | delete this; 70 | } 71 | void setPluginNamespace(const char* pluginNamespace) noexcept override 72 | { 73 | _s_plugin_namespace = pluginNamespace; 74 | } 75 | const char* getPluginNamespace() const noexcept override 76 | { 77 | return _s_plugin_namespace.c_str(); 78 | } 79 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept 80 | { 81 | return DataType::kFLOAT; 82 | } 83 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept 84 | { 85 | return false; 86 | } 87 | bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept 88 | { 89 | return false; 90 | } 91 | void attachToContext( 92 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept 93 | {} 94 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) noexcept; 95 | void detachFromContext() noexcept 96 | {} 97 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept 98 | { 99 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 100 | } 101 | IPluginV2* clone() const noexcept override; 102 | private: 103 | 104 | uint32_t _n_max_thread_pre_block; 105 | uint32_t _n_output_size; 106 | std::string _s_plugin_namespace; 107 | }; //end detect 108 | 109 | class HardswishPluginCreator : public IPluginCreator 110 | { 111 | public: 112 | HardswishPluginCreator(); 113 | ~HardswishPluginCreator() override = default; 114 | const char* getPluginName()const noexcept override; 115 | const char* getPluginVersion() const noexcept override; 116 | const PluginFieldCollection* getFieldNames() noexcept override; 117 | IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; 118 | IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; 119 | void setPluginNamespace(const char* libNamespace) noexcept override; 120 | const char* getPluginNamespace() const noexcept override; 121 | private: 122 | std::string _s_name_space; 123 | static PluginFieldCollection _fc; 124 | static std::vector _vec_plugin_attributes; 125 | };//end detect creator 126 | 127 | }//end namespace nvinfer1 128 | 129 | 130 | 131 | #endif -------------------------------------------------------------------------------- /modules/kernel.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __global__ void gpuYoloLayerV3(const float* input, float* output, const uint32_t grid_h_, 11 | const uint32_t grid_w_, const uint32_t numOutputClasses, 12 | const uint32_t numBBoxes) 13 | { 14 | uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x; 15 | uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y; 16 | uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z; 17 | 18 | if ((x_id >= grid_w_) || (y_id >= grid_h_) || (z_id >= numBBoxes)) 19 | { 20 | return; 21 | } 22 | 23 | const int numGridCells = grid_h_ * grid_w_; 24 | const int bbindex = y_id * grid_w_ + x_id; 25 | 26 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] 27 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]); 28 | 29 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] 30 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]); 31 | 32 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] 33 | = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]); 34 | 35 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] 36 | = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]); 37 | 38 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)] 39 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 40 | 41 | for (uint32_t i = 0; i < numOutputClasses; ++i) 42 | { 43 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] 44 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); 45 | } 46 | } 47 | 48 | 49 | //Scaled YOLOv4 format 50 | //https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982 51 | __global__ void gpuYoloV4Layer(const float* input, float* output, const uint32_t grid_h_, 52 | const uint32_t grid_w_, const uint32_t numOutputClasses, 53 | const uint32_t numBBoxes, float scale_x_y) 54 | { 55 | uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x; 56 | uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y; 57 | uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z; 58 | 59 | if ((x_id >= grid_w_) || (y_id >= grid_h_) || (z_id >= numBBoxes)) 60 | { 61 | return; 62 | } 63 | 64 | const int numGridCells = grid_h_ * grid_w_; 65 | const int bbindex = y_id * grid_w_ + x_id; 66 | //bx 67 | scale_x_y = 1.0; 68 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] 69 | = scale_x_y * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) - 0.5 * (scale_x_y -1); 70 | //by 71 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] 72 | = scale_x_y * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) - 0.5 * (scale_x_y -1); 73 | //bw 74 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] 75 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 4; 76 | //bh 77 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] 78 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 4; 79 | 80 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)] 81 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 82 | 83 | for (uint32_t i = 0; i < numOutputClasses; ++i) 84 | { 85 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] 86 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); 87 | } 88 | } 89 | 90 | 91 | cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint32_t& batchSize, 92 | const uint32_t& n_grid_h_,const uint32_t& n_grid_w_, 93 | const uint32_t& numOutputClasses, const uint32_t& numBBoxes, 94 | uint64_t outputSize, const uint32_t &new_coords, cudaStream_t stream) 95 | { 96 | dim3 threads_per_block(16, 16, 4); 97 | dim3 number_of_blocks((n_grid_w_ / threads_per_block.x) + 1, 98 | (n_grid_h_ / threads_per_block.y) + 1, 99 | (numBBoxes / threads_per_block.z) + 1); 100 | // printf("YOLOv3 (V4) \n"); 101 | for (int batch = 0; batch < batchSize; ++batch) 102 | { 103 | if (new_coords) { 104 | gpuYoloV4Layer<<>>( 105 | reinterpret_cast(input) + (batch * outputSize), 106 | reinterpret_cast(output) + (batch * outputSize), n_grid_h_, n_grid_w_, numOutputClasses, 107 | numBBoxes, 2.0); 108 | } else { 109 | gpuYoloLayerV3<<>>( 110 | reinterpret_cast(input) + (batch * outputSize), 111 | reinterpret_cast(output) + (batch * outputSize), n_grid_h_, n_grid_w_, numOutputClasses, 112 | numBBoxes); 113 | } 114 | 115 | } 116 | return cudaGetLastError(); 117 | } 118 | 119 | 120 | cudaError_t cudaYoloV4Layer(const void* input, void* output, const uint32_t& batchSize, 121 | const uint32_t& n_grid_h_,const uint32_t& n_grid_w_, 122 | const uint32_t& numOutputClasses, const uint32_t& numBBoxes, 123 | uint64_t outputSize, const uint32_t &new_coords, const float scale_x_y, cudaStream_t stream) 124 | { 125 | dim3 threads_per_block(16, 16, 4); 126 | dim3 number_of_blocks((n_grid_w_ / threads_per_block.x) + 1, 127 | (n_grid_h_ / threads_per_block.y) + 1, 128 | (numBBoxes / threads_per_block.z) + 1); 129 | // printf("Scaled YOLOv4 \n"); 130 | for (int batch = 0; batch < batchSize; ++batch) 131 | { 132 | gpuYoloV4Layer<<>>( 133 | reinterpret_cast(input) + (batch * outputSize), 134 | reinterpret_cast(output) + (batch * outputSize), n_grid_h_, n_grid_w_, numOutputClasses, 135 | numBBoxes, scale_x_y); 136 | } 137 | return cudaGetLastError(); 138 | } 139 | -------------------------------------------------------------------------------- /modules/mish.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "mish.h" 6 | 7 | namespace nvinfer1 8 | { 9 | MishPlugin::MishPlugin() 10 | { 11 | } 12 | 13 | MishPlugin::~MishPlugin() 14 | { 15 | } 16 | 17 | // create the plugin at runtime from a byte stream 18 | MishPlugin::MishPlugin(const void* data, size_t length) 19 | { 20 | assert(length == sizeof(input_size_)); 21 | input_size_ = *reinterpret_cast(data); 22 | } 23 | 24 | void MishPlugin::serialize(void* buffer) const noexcept 25 | { 26 | *reinterpret_cast(buffer) = input_size_; 27 | } 28 | 29 | size_t MishPlugin::getSerializationSize() const noexcept 30 | { 31 | return sizeof(input_size_); 32 | } 33 | 34 | int MishPlugin::initialize()noexcept 35 | { 36 | return 0; 37 | } 38 | 39 | bool MishPlugin::supportsFormat(DataType type, PluginFormat format) const noexcept 40 | { 41 | return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); 42 | } 43 | 44 | void MishPlugin::configureWithFormat(const Dims* inputDims, int nbInputs, 45 | const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept 46 | { 47 | 48 | } 49 | 50 | Dims MishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept 51 | { 52 | assert(nbInputDims == 1); 53 | assert(index == 0); 54 | input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]; 55 | // Output dimensions 56 | return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]); 57 | } 58 | 59 | // Set plugin namespace 60 | void MishPlugin::setPluginNamespace(const char* pluginNamespace)noexcept 61 | { 62 | mPluginNamespace = pluginNamespace; 63 | } 64 | 65 | const char* MishPlugin::getPluginNamespace() const noexcept 66 | { 67 | return mPluginNamespace; 68 | } 69 | 70 | // Return the DataType of the plugin output at the requested index 71 | DataType MishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept 72 | { 73 | return DataType::kFLOAT; 74 | } 75 | 76 | // Return true if output tensor is broadcast across a batch. 77 | bool MishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept 78 | { 79 | return false; 80 | } 81 | 82 | // Return true if plugin can use input that is broadcast across batch without replication. 83 | bool MishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const noexcept 84 | { 85 | return false; 86 | } 87 | 88 | void MishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)noexcept 89 | { 90 | } 91 | 92 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource. 93 | void MishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)noexcept 94 | { 95 | } 96 | 97 | // Detach the plugin object from its execution context. 98 | void MishPlugin::detachFromContext()noexcept {} 99 | 100 | const char* MishPlugin::getPluginType() const noexcept 101 | { 102 | return "Mish_TRT"; 103 | } 104 | 105 | const char* MishPlugin::getPluginVersion() const noexcept 106 | { 107 | return "1"; 108 | } 109 | 110 | void MishPlugin::destroy()noexcept 111 | { 112 | delete this; 113 | } 114 | 115 | // Clone the plugin 116 | IPluginV2* MishPlugin::clone() const noexcept 117 | { 118 | MishPlugin *p = new MishPlugin(); 119 | p->input_size_ = input_size_; 120 | p->setPluginNamespace(mPluginNamespace); 121 | return p; 122 | } 123 | 124 | __device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} 125 | 126 | __device__ float softplus_kernel(float x, float threshold = 20) 127 | { 128 | if (x > threshold) return x; // too large 129 | else if (x < -threshold) return expf(x); // too small 130 | return logf(expf(x) + 1); 131 | } 132 | 133 | __global__ void mish_kernel(const float *input, float *output, int num_elem) 134 | { 135 | 136 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 137 | if (idx >= num_elem) return; 138 | 139 | //float t = exp(input[idx]); 140 | //if (input[idx] > 20.0) { 141 | // t *= t; 142 | // output[idx] = (t - 1.0) / (t + 1.0); 143 | //} else { 144 | // float tt = t * t; 145 | // output[idx] = (tt + 2.0 * t) / (tt + 2.0 * t + 2.0); 146 | //} 147 | //output[idx] *= input[idx]; 148 | output[idx] = input[idx] * tanh_activate_kernel(softplus_kernel(input[idx])); 149 | } 150 | 151 | void MishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) 152 | { 153 | int block_size = thread_count_; 154 | int grid_size = (input_size_ * batchSize + block_size - 1) / block_size; 155 | mish_kernel<<>>(inputs[0], output, input_size_ * batchSize); 156 | } 157 | 158 | int MishPlugin::enqueue(int batchSize, 159 | const void* const* inputs, 160 | void* const* outputs, 161 | void* workspace, 162 | cudaStream_t stream) noexcept 163 | { 164 | //assert(batchSize == 1); 165 | //GPU 166 | //CUDA_CHECK(cudaStreamSynchronize(stream)); 167 | forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize); 168 | return 0; 169 | } 170 | 171 | PluginFieldCollection MishPluginCreator::mFC{}; 172 | std::vector MishPluginCreator::mPluginAttributes; 173 | 174 | MishPluginCreator::MishPluginCreator() 175 | { 176 | mPluginAttributes.clear(); 177 | 178 | mFC.nbFields = mPluginAttributes.size(); 179 | mFC.fields = mPluginAttributes.data(); 180 | } 181 | 182 | const char* MishPluginCreator::getPluginName() const noexcept 183 | { 184 | return "Mish_TRT"; 185 | } 186 | 187 | const char* MishPluginCreator::getPluginVersion() const noexcept 188 | { 189 | return "1"; 190 | } 191 | 192 | const PluginFieldCollection* MishPluginCreator::getFieldNames()noexcept 193 | { 194 | return &mFC; 195 | } 196 | 197 | IPluginV2* MishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept 198 | { 199 | MishPlugin* obj = new MishPlugin(); 200 | obj->setPluginNamespace(mNamespace.c_str()); 201 | return obj; 202 | } 203 | 204 | IPluginV2* MishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept 205 | { 206 | // This object will be deleted when the network is destroyed, which will 207 | // call MishPlugin::destroy() 208 | MishPlugin* obj = new MishPlugin(serialData, serialLength); 209 | obj->setPluginNamespace(mNamespace.c_str()); 210 | return obj; 211 | } 212 | 213 | 214 | void MishPluginCreator::setPluginNamespace(const char* libNamespace)noexcept 215 | { 216 | mNamespace = libNamespace; 217 | } 218 | 219 | const char* MishPluginCreator::getPluginNamespace() const noexcept 220 | { 221 | return mNamespace.c_str(); 222 | } 223 | 224 | 225 | } 226 | 227 | -------------------------------------------------------------------------------- /modules/mish.h: -------------------------------------------------------------------------------- 1 | #ifndef _MISH_PLUGIN_H 2 | #define _MISH_PLUGIN_H 3 | 4 | #include 5 | #include 6 | #include "NvInfer.h" 7 | 8 | 9 | //https://github.com/wang-xinyu/tensorrtx 10 | namespace nvinfer1 11 | { 12 | class MishPlugin: public IPluginV2 13 | { 14 | public: 15 | explicit MishPlugin(); 16 | MishPlugin(const void* data, size_t length); 17 | 18 | ~MishPlugin(); 19 | 20 | int getNbOutputs() const noexcept override 21 | { 22 | return 1; 23 | } 24 | 25 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept override; 26 | 27 | int initialize() noexcept override; 28 | 29 | virtual void terminate() noexcept override {} 30 | 31 | virtual size_t getWorkspaceSize(int maxBatchSize) const noexcept override { return 0;} 32 | 33 | // virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream); 34 | int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, 35 | cudaStream_t stream) noexcept override; 36 | bool supportsFormat(DataType type, PluginFormat format) const noexcept override; 37 | void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; 38 | 39 | virtual size_t getSerializationSize() const noexcept override; 40 | 41 | virtual void serialize(void* buffer) const noexcept override; 42 | 43 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept { 44 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 45 | } 46 | 47 | const char* getPluginType() const noexcept override; 48 | 49 | const char* getPluginVersion() const noexcept override; 50 | 51 | void destroy() noexcept override; 52 | 53 | IPluginV2* clone() const noexcept override; 54 | 55 | void setPluginNamespace(const char* pluginNamespace) noexcept override; 56 | 57 | const char* getPluginNamespace() const noexcept override; 58 | 59 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept; 60 | 61 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept; 62 | 63 | bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept; 64 | 65 | void attachToContext( 66 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)noexcept; 67 | 68 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)noexcept; 69 | 70 | void detachFromContext()noexcept; 71 | 72 | int input_size_; 73 | private: 74 | void forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize = 1); 75 | int thread_count_ = 256; 76 | const char* mPluginNamespace; 77 | }; 78 | 79 | class MishPluginCreator : public IPluginCreator 80 | { 81 | public: 82 | MishPluginCreator(); 83 | 84 | ~MishPluginCreator() override = default; 85 | 86 | const char* getPluginName() const noexcept override; 87 | 88 | const char* getPluginVersion() const noexcept override; 89 | 90 | const PluginFieldCollection* getFieldNames() noexcept override; 91 | 92 | IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; 93 | 94 | IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; 95 | 96 | void setPluginNamespace(const char* libNamespace) noexcept override; 97 | 98 | const char* getPluginNamespace() const noexcept override; 99 | 100 | private: 101 | std::string mNamespace; 102 | static PluginFieldCollection mFC; 103 | static std::vector mPluginAttributes; 104 | }; 105 | } 106 | #endif 107 | -------------------------------------------------------------------------------- /modules/plugin_factory.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "plugin_factory.h" 4 | #include "trt_utils.h" 5 | 6 | 7 | /******* Yolo Layer V3 *******/ 8 | /*****************************/ 9 | namespace nvinfer1 10 | { 11 | YoloLayer::YoloLayer() 12 | {} 13 | 14 | YoloLayer::YoloLayer(const void* data, size_t length) 15 | { 16 | const char *d = static_cast(data), *a = d; 17 | re(d, m_NumBoxes); 18 | re(d, m_NumClasses); 19 | re(d, _n_grid_h); 20 | re(d, _n_grid_w); 21 | re(d, m_OutputSize); 22 | re(d, m_new_coords); 23 | assert(d = a + length); 24 | } 25 | void YoloLayer::serialize(void* buffer)const noexcept 26 | { 27 | char *d = static_cast(buffer), *a = d; 28 | wr(d, m_NumBoxes); 29 | wr(d, m_NumClasses); 30 | wr(d, _n_grid_h); 31 | wr(d, _n_grid_w); 32 | wr(d, m_OutputSize); 33 | wr(d, m_new_coords); 34 | assert(d == a + getSerializationSize()); 35 | printf("Serialize V3\n"); 36 | } 37 | 38 | bool YoloLayer::supportsFormat(DataType type, PluginFormat format) const noexcept 39 | { 40 | return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); 41 | } 42 | 43 | void YoloLayer::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept 44 | { 45 | 46 | } 47 | 48 | IPluginV2* YoloLayer::clone() const noexcept 49 | { 50 | YoloLayer *p = new YoloLayer(m_NumBoxes,m_NumClasses,_n_grid_h,_n_grid_w, m_new_coords); 51 | p->setPluginNamespace(_s_plugin_namespace.c_str()); 52 | return p; 53 | } 54 | 55 | YoloLayer::YoloLayer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t& new_coords) : 56 | m_NumBoxes(numBoxes), 57 | m_NumClasses(numClasses), 58 | _n_grid_h(grid_h_), 59 | _n_grid_w(grid_w_), 60 | m_new_coords(new_coords) 61 | { 62 | assert(m_NumBoxes > 0); 63 | assert(m_NumClasses > 0); 64 | assert(_n_grid_h > 0); 65 | assert(_n_grid_w > 0); 66 | m_OutputSize = _n_grid_h * _n_grid_w * (m_NumBoxes * (4 + 1 + m_NumClasses)); 67 | } 68 | 69 | int YoloLayer::getNbOutputs() const noexcept { return 1; } 70 | 71 | nvinfer1::Dims YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, 72 | int nbInputDims) noexcept 73 | { 74 | assert(index == 0); 75 | assert(nbInputDims == 1); 76 | return inputs[0]; 77 | } 78 | 79 | //void YoloLayerV3::configure(const nvinfer1::Dims* inputDims, int nbInputs, 80 | // const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept 81 | //{ 82 | // assert(nbInputs == 1); 83 | // assert(inputDims != nullptr); 84 | //} 85 | 86 | int YoloLayer::initialize() noexcept { return 0; } 87 | 88 | void YoloLayer::terminate() noexcept {} 89 | 90 | size_t YoloLayer::getWorkspaceSize(int maxBatchSize) const noexcept 91 | { 92 | return 0; 93 | } 94 | 95 | int YoloLayer::enqueue(int batchSize, 96 | const void* const* inputs, 97 | void* const* outputs, 98 | void* workspace, 99 | cudaStream_t stream) noexcept 100 | { 101 | NV_CUDA_CHECK(cudaYoloLayerV3(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, m_NumClasses, 102 | m_NumBoxes, m_OutputSize, m_new_coords, stream)); 103 | return 0; 104 | } 105 | 106 | size_t YoloLayer::getSerializationSize()const noexcept 107 | { 108 | return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(_n_grid_w) + sizeof(_n_grid_h) + sizeof(m_OutputSize) + sizeof(m_new_coords); 109 | } 110 | 111 | 112 | 113 | 114 | PluginFieldCollection YoloLayerPluginCreator::mFC{}; 115 | std::vector YoloLayerPluginCreator::mPluginAttributes; 116 | 117 | YoloLayerPluginCreator::YoloLayerPluginCreator() 118 | { 119 | mPluginAttributes.clear(); 120 | 121 | mFC.nbFields = mPluginAttributes.size(); 122 | mFC.fields = mPluginAttributes.data(); 123 | 124 | } 125 | 126 | const char* YoloLayerPluginCreator::getPluginName() const noexcept 127 | { 128 | return "YOLO_TRT"; 129 | } 130 | 131 | const char* YoloLayerPluginCreator::getPluginVersion() const noexcept 132 | { 133 | return "1.0"; 134 | } 135 | 136 | const PluginFieldCollection* YoloLayerPluginCreator::getFieldNames()noexcept 137 | { 138 | return &mFC; 139 | } 140 | 141 | IPluginV2* YoloLayerPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept 142 | { 143 | 144 | YoloLayer* obj = new YoloLayer(); 145 | obj->setPluginNamespace(mNamespace.c_str()); 146 | return obj; 147 | } 148 | 149 | IPluginV2* YoloLayerPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept 150 | { 151 | // This object will be deleted when the network is destroyed, which will 152 | // call MishPlugin::destroy() 153 | YoloLayer* obj = new YoloLayer(serialData, serialLength); 154 | obj->setPluginNamespace(mNamespace.c_str()); 155 | return obj; 156 | } 157 | 158 | 159 | void YoloLayerPluginCreator::setPluginNamespace(const char* libNamespace)noexcept 160 | { 161 | mNamespace = libNamespace; 162 | } 163 | 164 | const char* YoloLayerPluginCreator::getPluginNamespace() const noexcept 165 | { 166 | return mNamespace.c_str(); 167 | } 168 | } 169 | 170 | namespace nvinfer1 171 | { 172 | //Scaled YOLOv4 173 | YoloV4Layer::YoloV4Layer() 174 | {} 175 | 176 | YoloV4Layer::YoloV4Layer(const void* data, size_t length) 177 | { 178 | const char *d = static_cast(data), *a = d; 179 | re(d, m_NumBoxes); 180 | re(d, m_NumClasses); 181 | re(d, _n_grid_h); 182 | re(d, _n_grid_w); 183 | re(d, m_OutputSize); 184 | re(d, m_new_coords); 185 | re(d, m_scale_x_y); 186 | assert(d = a + length); 187 | } 188 | void YoloV4Layer::serialize(void* buffer)const noexcept 189 | { 190 | char *d = static_cast(buffer), *a = d; 191 | wr(d, m_NumBoxes); 192 | wr(d, m_NumClasses); 193 | wr(d, _n_grid_h); 194 | wr(d, _n_grid_w); 195 | wr(d, m_OutputSize); 196 | wr(d, m_new_coords); 197 | wr(d, m_scale_x_y); 198 | assert(d == a + getSerializationSize()); 199 | } 200 | 201 | bool YoloV4Layer::supportsFormat(DataType type, PluginFormat format) const noexcept 202 | { 203 | return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); 204 | } 205 | 206 | void YoloV4Layer::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept 207 | { 208 | 209 | } 210 | 211 | IPluginV2* YoloV4Layer::clone() const noexcept 212 | { 213 | YoloV4Layer *p = new YoloV4Layer(m_NumBoxes,m_NumClasses,_n_grid_h,_n_grid_w, m_new_coords, m_scale_x_y); 214 | p->setPluginNamespace(_s_plugin_namespace.c_str()); 215 | return p; 216 | } 217 | 218 | YoloV4Layer::YoloV4Layer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t &new_coords, const float& scale_x_y) : 219 | m_NumBoxes(numBoxes), 220 | m_NumClasses(numClasses), 221 | _n_grid_h(grid_h_), 222 | _n_grid_w(grid_w_), 223 | m_scale_x_y(scale_x_y), 224 | m_new_coords(new_coords) 225 | { 226 | assert(m_NumBoxes > 0); 227 | assert(m_NumClasses > 0); 228 | assert(_n_grid_h > 0); 229 | assert(_n_grid_w > 0); 230 | assert(m_scale_x_y > 0); 231 | m_OutputSize = _n_grid_h * _n_grid_w * (m_NumBoxes * (4 + 1 + m_NumClasses)); 232 | } 233 | 234 | int YoloV4Layer::getNbOutputs() const noexcept { return 1; } 235 | 236 | nvinfer1::Dims YoloV4Layer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, 237 | int nbInputDims) noexcept 238 | { 239 | assert(index == 0); 240 | assert(nbInputDims == 1); 241 | return inputs[0]; 242 | } 243 | 244 | //void YoloV4LayerV3::configure(const nvinfer1::Dims* inputDims, int nbInputs, 245 | // const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept 246 | //{ 247 | // assert(nbInputs == 1); 248 | // assert(inputDims != nullptr); 249 | //} 250 | 251 | int YoloV4Layer::initialize() noexcept { return 0; } 252 | 253 | void YoloV4Layer::terminate() noexcept {} 254 | 255 | size_t YoloV4Layer::getWorkspaceSize(int maxBatchSize) const noexcept 256 | { 257 | return 0; 258 | } 259 | 260 | int YoloV4Layer::enqueue(int batchSize, 261 | const void* const* inputs, 262 | void* const* outputs, 263 | void* workspace, 264 | cudaStream_t stream) noexcept 265 | { 266 | NV_CUDA_CHECK(cudaYoloV4Layer(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, m_NumClasses, 267 | m_NumBoxes, m_OutputSize, m_new_coords, m_scale_x_y, stream)); 268 | return 0; 269 | } 270 | 271 | size_t YoloV4Layer::getSerializationSize()const noexcept 272 | { 273 | printf("Serialize V4\n"); 274 | return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(_n_grid_w) + sizeof(_n_grid_h) + sizeof(m_OutputSize) + sizeof(m_scale_x_y) + sizeof(m_new_coords); 275 | } 276 | 277 | 278 | 279 | 280 | PluginFieldCollection YoloV4LayerPluginCreator::mFC{}; 281 | std::vector YoloV4LayerPluginCreator::mPluginAttributes; 282 | 283 | YoloV4LayerPluginCreator::YoloV4LayerPluginCreator() 284 | { 285 | mPluginAttributes.clear(); 286 | 287 | mFC.nbFields = mPluginAttributes.size(); 288 | mFC.fields = mPluginAttributes.data(); 289 | 290 | } 291 | 292 | const char* YoloV4LayerPluginCreator::getPluginName() const noexcept 293 | { 294 | return "YOLOV4_TRT"; 295 | } 296 | 297 | const char* YoloV4LayerPluginCreator::getPluginVersion() const noexcept 298 | { 299 | return "1.0"; 300 | } 301 | 302 | const PluginFieldCollection* YoloV4LayerPluginCreator::getFieldNames()noexcept 303 | { 304 | return &mFC; 305 | } 306 | 307 | IPluginV2* YoloV4LayerPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept 308 | { 309 | 310 | YoloV4Layer* obj = new YoloV4Layer(); 311 | obj->setPluginNamespace(mNamespace.c_str()); 312 | return obj; 313 | } 314 | 315 | IPluginV2* YoloV4LayerPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept 316 | { 317 | // This object will be deleted when the network is destroyed, which will 318 | // call MishPlugin::destroy() 319 | YoloV4Layer* obj = new YoloV4Layer(serialData, serialLength); 320 | obj->setPluginNamespace(mNamespace.c_str()); 321 | return obj; 322 | } 323 | 324 | 325 | void YoloV4LayerPluginCreator::setPluginNamespace(const char* libNamespace)noexcept 326 | { 327 | mNamespace = libNamespace; 328 | } 329 | 330 | const char* YoloV4LayerPluginCreator::getPluginNamespace() const noexcept 331 | { 332 | return mNamespace.c_str(); 333 | } 334 | 335 | 336 | } 337 | -------------------------------------------------------------------------------- /modules/plugin_factory.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #ifndef __PLUGIN_LAYER_H__ 27 | #define __PLUGIN_LAYER_H__ 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include "NvInfer.h" 36 | 37 | #define NV_CUDA_CHECK(status) \ 38 | { \ 39 | if (status != 0) \ 40 | { \ 41 | std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ 42 | << " at line " << __LINE__ << std::endl; \ 43 | abort(); \ 44 | } \ 45 | } 46 | 47 | // Forward declaration of cuda kernels 48 | cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint32_t& batchSize, 49 | const uint32_t& n_grid_h_, const uint32_t& n_grid_w_, 50 | const uint32_t& numOutputClasses, const uint32_t& numBBoxes, 51 | uint64_t outputSize, const uint32_t& m_new_coords, cudaStream_t stream); 52 | 53 | cudaError_t cudaYoloV4Layer(const void* input, void* output, const uint32_t& batchSize, 54 | const uint32_t& n_grid_h_,const uint32_t& n_grid_w_, 55 | const uint32_t& numOutputClasses, const uint32_t& numBBoxes, 56 | uint64_t outputSize, const uint32_t& m_new_coords, const float scale_x_y, cudaStream_t stream); 57 | //class PluginFactory : public nvinfer1::IPluginFactory 58 | //{ 59 | // 60 | //public: 61 | // PluginFactory(); 62 | // nvinfer1::IPlugin* createPlugin(const char* layerName, const void* serialData, 63 | // size_t serialLength); 64 | // bool isPlugin(const char* name); 65 | // void destroy(); 66 | // 67 | //private: 68 | // static const int m_MaxLeakyLayers = 72; 69 | // static const int m_ReorgStride = 2; 70 | // static constexpr float m_LeakyNegSlope = 0.1f; 71 | // static const int m_NumBoxes = 5; 72 | // static const int m_NumCoords = 4; 73 | // static const int m_NumClasses = 80; 74 | // static const int m_MaxYoloLayers = 3; 75 | // int m_LeakyReLUCount = 0; 76 | // int m_YoloLayerCount = 0; 77 | // nvinfer1::plugin::RegionParameters m_RegionParameters{m_NumBoxes, m_NumCoords, m_NumClasses, 78 | // nullptr}; 79 | // 80 | // struct INvPluginDeleter 81 | // { 82 | // void operator()(nvinfer1::plugin::INvPlugin* ptr) 83 | // { 84 | // if (ptr) 85 | // { 86 | // ptr->destroy(); 87 | // } 88 | // } 89 | // }; 90 | // struct IPluginDeleter 91 | // { 92 | // void operator()(nvinfer1::IPlugin* ptr) 93 | // { 94 | // if (ptr) 95 | // { 96 | // ptr->terminate(); 97 | // } 98 | // } 99 | // }; 100 | // typedef std::unique_ptr unique_ptr_INvPlugin; 101 | // typedef std::unique_ptr unique_ptr_IPlugin; 102 | // 103 | // unique_ptr_INvPlugin m_ReorgLayer; 104 | // unique_ptr_INvPlugin m_RegionLayer; 105 | // unique_ptr_INvPlugin m_LeakyReLULayers[m_MaxLeakyLayers]; 106 | // unique_ptr_IPlugin m_YoloLayers[m_MaxYoloLayers]; 107 | //}; 108 | namespace nvinfer1 109 | { 110 | template 111 | void wr(char*& buffer, const T& val) 112 | { 113 | *reinterpret_cast(buffer) = val; 114 | buffer += sizeof(T); 115 | } 116 | 117 | template 118 | void re(const char*& buffer, T& val) 119 | { 120 | val = *reinterpret_cast(buffer); 121 | buffer += sizeof(T); 122 | } 123 | 124 | class YoloLayer : public IPluginV2 125 | { 126 | public: 127 | explicit YoloLayer(); 128 | YoloLayer(const void* data, size_t length); 129 | YoloLayer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t &new_coords); 130 | int getNbOutputs() const noexcept override; 131 | nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, 132 | int nbInputDims)noexcept override; 133 | /*void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, 134 | int nbOutputs, int maxBatchSize)noexcept override;*/ 135 | 136 | /*void configure(const nvinfer1::Dims* inputDims, int nbInputs, 137 | const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize)noexcept override;*/ 138 | 139 | int initialize()noexcept override; 140 | void terminate()noexcept override; 141 | size_t getWorkspaceSize(int maxBatchSize) const noexcept override; 142 | 143 | int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, 144 | cudaStream_t stream) noexcept override; 145 | 146 | size_t getSerializationSize() const noexcept override; 147 | void serialize(void* buffer) const noexcept override; 148 | 149 | const char* getPluginType() const noexcept override 150 | { 151 | return "YOLO_TRT"; 152 | } 153 | bool supportsFormat(DataType type, PluginFormat format) const noexcept override; 154 | void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; 155 | 156 | const char* getPluginVersion() const noexcept override 157 | { 158 | return "1.0"; 159 | } 160 | 161 | void setPluginNamespace(const char* pluginNamespace) noexcept override 162 | { 163 | _s_plugin_namespace = pluginNamespace; 164 | } 165 | const char* getPluginNamespace() const noexcept override 166 | { 167 | return _s_plugin_namespace.c_str(); 168 | } 169 | void destroy() noexcept override 170 | { 171 | delete this; 172 | } 173 | IPluginV2* clone() const noexcept override; 174 | private: 175 | 176 | std::string _s_plugin_namespace; 177 | uint32_t m_NumBoxes; 178 | uint32_t m_NumClasses; 179 | uint32_t m_GridSize; 180 | uint64_t m_OutputSize; 181 | uint32_t _n_grid_h; 182 | uint32_t _n_grid_w; 183 | uint32_t m_new_coords; 184 | }; 185 | 186 | 187 | 188 | class YoloLayerPluginCreator : public IPluginCreator 189 | { 190 | public: 191 | YoloLayerPluginCreator(); 192 | 193 | ~YoloLayerPluginCreator() override = default; 194 | 195 | const char* getPluginName() const noexcept override; 196 | 197 | const char* getPluginVersion() const noexcept override; 198 | 199 | const PluginFieldCollection* getFieldNames() noexcept override; 200 | 201 | IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; 202 | 203 | IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; 204 | 205 | void setPluginNamespace(const char* libNamespace) noexcept override; 206 | 207 | const char* getPluginNamespace() const noexcept override; 208 | 209 | private: 210 | std::string mNamespace; 211 | static PluginFieldCollection mFC; 212 | static std::vector mPluginAttributes; 213 | }; 214 | 215 | 216 | //Scaled YOLOv4 217 | class YoloV4Layer : public IPluginV2 218 | { 219 | public: 220 | explicit YoloV4Layer(); 221 | YoloV4Layer(const void* data, size_t length); 222 | YoloV4Layer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t &new_coords, const float& scale_x_y); 223 | int getNbOutputs() const noexcept override; 224 | nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, 225 | int nbInputDims)noexcept override; 226 | /*void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, 227 | int nbOutputs, int maxBatchSize)noexcept override;*/ 228 | 229 | /*void configure(const nvinfer1::Dims* inputDims, int nbInputs, 230 | const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize)noexcept override;*/ 231 | 232 | int initialize()noexcept override; 233 | void terminate()noexcept override; 234 | size_t getWorkspaceSize(int maxBatchSize) const noexcept override; 235 | 236 | int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, 237 | cudaStream_t stream) noexcept override; 238 | 239 | size_t getSerializationSize() const noexcept override; 240 | void serialize(void* buffer) const noexcept override; 241 | 242 | const char* getPluginType() const noexcept override 243 | { 244 | return "YOLO_TRT"; 245 | } 246 | bool supportsFormat(DataType type, PluginFormat format) const noexcept override; 247 | void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; 248 | 249 | const char* getPluginVersion() const noexcept override 250 | { 251 | return "1.0"; 252 | } 253 | 254 | void setPluginNamespace(const char* pluginNamespace) noexcept override 255 | { 256 | _s_plugin_namespace = pluginNamespace; 257 | } 258 | const char* getPluginNamespace() const noexcept override 259 | { 260 | return _s_plugin_namespace.c_str(); 261 | } 262 | void destroy() noexcept override 263 | { 264 | delete this; 265 | } 266 | IPluginV2* clone() const noexcept override; 267 | private: 268 | 269 | std::string _s_plugin_namespace; 270 | uint32_t m_NumBoxes; 271 | uint32_t m_NumClasses; 272 | uint32_t m_GridSize; 273 | uint64_t m_OutputSize; 274 | uint32_t _n_grid_h; 275 | uint32_t _n_grid_w; 276 | uint32_t m_new_coords; 277 | float m_scale_x_y; 278 | 279 | }; 280 | 281 | 282 | 283 | class YoloV4LayerPluginCreator : public IPluginCreator 284 | { 285 | public: 286 | YoloV4LayerPluginCreator(); 287 | 288 | ~YoloV4LayerPluginCreator() override = default; 289 | 290 | const char* getPluginName() const noexcept override; 291 | 292 | const char* getPluginVersion() const noexcept override; 293 | 294 | const PluginFieldCollection* getFieldNames() noexcept override; 295 | 296 | IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; 297 | 298 | IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; 299 | 300 | void setPluginNamespace(const char* libNamespace) noexcept override; 301 | 302 | const char* getPluginNamespace() const noexcept override; 303 | 304 | private: 305 | std::string mNamespace; 306 | static PluginFieldCollection mFC; 307 | static std::vector mPluginAttributes; 308 | }; 309 | } 310 | #endif // __PLUGIN_LAYER_H__ 311 | -------------------------------------------------------------------------------- /modules/preprocess.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "preprocess.h" 5 | 6 | #define BLOCK 512 7 | 8 | dim3 cuda_gridsize(size_t n){ 9 | size_t k = (n-1) / BLOCK + 1; 10 | size_t x = k; 11 | size_t y = 1; 12 | if(x > 65535){ 13 | x = ceil(sqrt(k)); 14 | y = (n-1)/(x*BLOCK) + 1; 15 | } 16 | dim3 d; 17 | d.x = x; 18 | d.y = y; 19 | d.z = 1; 20 | return d; 21 | } 22 | 23 | 24 | __global__ void SimpleblobFromImageKernel(int N, float* dst_img, unsigned char* src_img, 25 | int dst_h, int dst_w, int src_h, int src_w, 26 | float stride_h, float stride_w, float norm) 27 | { 28 | int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 29 | 30 | if (index >= N) return; 31 | int chan = 3; 32 | int w = index % dst_w; 33 | int h = index / dst_w; 34 | float centroid_h, centroid_w; 35 | int c; 36 | centroid_h = stride_h * (float)(h + 0.5); 37 | centroid_w = stride_w * (float)(w + 0.5); 38 | int src_h_idx = lroundf(centroid_h)-1; 39 | int src_w_idx = lroundf(centroid_w)-1; 40 | if (src_h_idx<0){src_h_idx=0;} 41 | if (src_w_idx<0){src_w_idx=0;} 42 | index = chan * src_w_idx + chan* src_w * src_h_idx; 43 | 44 | for (c = 0; c < chan; c++) { 45 | int dst_index = w + (dst_w*h) + (dst_w*dst_h*c); 46 | dst_img[dst_index] = (float)src_img[index+c]*norm; 47 | } 48 | } 49 | 50 | void blobFromImageGpu(float *dst, unsigned char*src, int d_w, int d_h, int d_c, 51 | int s_w, int s_h, int s_c, float norm, cudaStream_t stream) 52 | { 53 | int N = d_w * d_h; 54 | float stride_h = (float)s_h / (float)d_h; 55 | float stride_w = (float)s_w / (float)d_w; 56 | 57 | SimpleblobFromImageKernel<<>>(N, dst, src, 58 | d_h, d_w, 59 | s_h, s_w, 60 | stride_h, stride_w, norm); 61 | 62 | } 63 | 64 | -------------------------------------------------------------------------------- /modules/preprocess.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | void blobFromImageGpu(float *dst, unsigned char*src, int d_w, int d_h, int d_c, 9 | int s_w, int s_h, int s_c, float norm, cudaStream_t stream); 10 | -------------------------------------------------------------------------------- /modules/profiler.cpp: -------------------------------------------------------------------------------- 1 | //#include "profiler.h" 2 | #include "profiler.h" 3 | #include 4 | 5 | using namespace nvinfer1; 6 | 7 | 8 | SimpleProfiler::SimpleProfiler(std::string name, 9 | const std::vector& src_profilers) : 10 | m_name(name) 11 | { 12 | float total_time = 0.0; 13 | m_index = 0; 14 | for (const auto& src_profiler : src_profilers) { 15 | for (const auto& rec : src_profiler.m_profile) { 16 | auto it = m_profile.find(rec.first); 17 | 18 | if (it == m_profile.end()) { 19 | m_profile.insert(rec); 20 | } else { 21 | it->second.time += rec.second.time; 22 | it->second.count += rec.second.count; 23 | total_time += rec.second.time; 24 | } 25 | } 26 | } 27 | } 28 | 29 | void SimpleProfiler::reportLayerTime(const char* layerName, float ms) noexcept 30 | { 31 | 32 | m_profile[layerName].count++; 33 | m_profile[layerName].time += ms; 34 | if (m_profile[layerName].min_time == -1.0) { 35 | m_profile[layerName].min_time = ms; 36 | m_profile[layerName].index = m_index; 37 | m_index++; 38 | } else if (m_profile[layerName].min_time > ms) { 39 | m_profile[layerName].min_time = ms; 40 | } 41 | } 42 | 43 | void SimpleProfiler::setProfDict(nvinfer1::ILayer *layer) noexcept 44 | { 45 | std::string name = layer->getName(); 46 | auto t = layer->getType(); 47 | m_layer_dict[name]; 48 | m_layer_dict[name].type = layer->getType(); 49 | if (layer->getType() == nvinfer1::LayerType::kCONVOLUTION) { 50 | nvinfer1::IConvolutionLayer* conv = (nvinfer1::IConvolutionLayer*)layer; 51 | nvinfer1::ITensor* in = layer->getInput(0); 52 | nvinfer1::Dims dim_in = in->getDimensions(); 53 | nvinfer1::ITensor* out = layer->getOutput(0); 54 | nvinfer1::Dims dim_out = out->getDimensions(); 55 | nvinfer1::Dims k_dims = conv->getKernelSizeNd(); 56 | nvinfer1::Dims s_dims = conv->getStrideNd(); 57 | int groups = conv->getNbGroups(); 58 | int stride = s_dims.d[0]; 59 | int kernel = k_dims.d[0]; 60 | m_layer_dict[name].in_c = dim_in.d[0]; 61 | m_layer_dict[name].out_c = dim_out.d[0]; 62 | m_layer_dict[name].w= dim_in.d[2]; 63 | m_layer_dict[name].h = dim_in.d[1]; 64 | m_layer_dict[name].k = kernel;; 65 | m_layer_dict[name].stride = stride; 66 | m_layer_dict[name].groups = groups; 67 | 68 | } 69 | } 70 | 71 | LayerInfo *SimpleProfiler::getILayerFromName(std::string name) noexcept 72 | { 73 | LayerInfo *linfo = NULL; 74 | /* 75 | if (m_layer_dict.find(name) != m_layer_dict.end()) { 76 | linfo = &m_layer_dict[name]; 77 | } 78 | */ 79 | for (auto it = m_layer_dict.begin(); it != m_layer_dict.end(); it++) { 80 | std::string key = it->first; 81 | if (!name.find(key)) { 82 | linfo = &m_layer_dict[key]; 83 | } 84 | } 85 | return linfo; 86 | } 87 | 88 | std::ostream& operator<<(std::ostream& out, SimpleProfiler& value) 89 | { 90 | out << "========== " << value.m_name << " profile ==========" << std::endl; 91 | float totalTime = 0; 92 | std::string layerNameStr = "Operation"; 93 | 94 | int maxLayerNameLength = static_cast(layerNameStr.size()); 95 | for (const auto& elem : value.m_profile) 96 | { 97 | totalTime += elem.second.time; 98 | maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); 99 | } 100 | 101 | auto old_settings = out.flags(); 102 | auto old_precision = out.precision(); 103 | // Output header 104 | { 105 | out << std::setw(maxLayerNameLength) << layerNameStr << " "; 106 | out << std::setw(12) << "Runtime, " 107 | << "%" 108 | << " "; 109 | out << std::setw(12) << "Invocations" 110 | << " "; 111 | out << std::setw(12) << "Runtime[ms]" ; 112 | out << std::setw(12) << "Min Runtime[ms]" << std::endl; 113 | } 114 | int index = value.m_index; 115 | for (int i = 0; i < index; i++) { 116 | for (const auto& elem : value.m_profile){ 117 | if (elem.second.index == i) { 118 | out << i << ":"; 119 | out << std::setw(maxLayerNameLength) << elem.first << ","; 120 | out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.second.time * 100.0F / totalTime) << "%" 121 | << ","; 122 | out << std::setw(12) << elem.second.count << ","; 123 | out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.time << ", "; 124 | out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.min_time << std::endl; 125 | } 126 | } 127 | } 128 | out.flags(old_settings); 129 | out.precision(old_precision); 130 | out << "========== " << value.m_name << " total runtime = " << totalTime << " ms ==========" << std::endl; 131 | out << "Conv Profile" << std::endl; 132 | out << "index, w, h, in_c, out_c, k, stride, groups, mintime[us], name" << std::endl; 133 | 134 | for (int i = 0; i < index; i++) { 135 | for (const auto& elem : value.m_profile){ 136 | if (elem.second.index == i) { 137 | LayerInfo* linfo = value.getILayerFromName(elem.first); 138 | if (linfo) { 139 | if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) { 140 | out << std::setw(4) << std::fixed << i << ", "; 141 | out << std::setw(4) << std::fixed << linfo->w << ", "; 142 | out << std::setw(4) << std::fixed << linfo->h << ", "; 143 | out << std::setw(4) << std::fixed << linfo->in_c << ", "; 144 | out << std::setw(4) << std::fixed << linfo->out_c << ", "; 145 | out << std::setw(4) << std::fixed << linfo->k << ", "; 146 | out << std::setw(4) << std::fixed << linfo->stride << ", "; 147 | out << std::setw(4) << std::fixed << linfo->groups << ", "; 148 | 149 | 150 | 151 | out << std::setw(10) << std::setprecision(4) << elem.second.min_time *1000 << ", "; 152 | out << std::setw(6) << elem.first << "," << std::endl; 153 | } 154 | } 155 | } 156 | } 157 | } 158 | 159 | std::vector chan{8, 16, 24, 32, 48, 64, 80, 96, 128, 160, 192, 256, 320, 384}; 160 | //k=1 conv 161 | int kernel = 1; 162 | out << "###Conv Kernel(1)" <k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) { 177 | if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) { 178 | out << std::setw(10) << std::setprecision(4) << elem.second.min_time *1000; 179 | flg = true; 180 | break; 181 | } 182 | } 183 | } 184 | } 185 | } 186 | out << ", "; 187 | } 188 | out << std::endl; 189 | } 190 | kernel = 3; 191 | out << "###Conv Kernel(" << kernel << ")" <k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) { 206 | if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) { 207 | out << std::setw(10) << std::setprecision(4) << elem.second.min_time *1000; 208 | flg = true; 209 | break; 210 | } 211 | } 212 | } 213 | } 214 | } 215 | out << ", "; 216 | } 217 | out << std::endl; 218 | } 219 | 220 | kernel = 5; 221 | out << "###Conv Kernel(" << kernel << ")" <k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) { 236 | if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) { 237 | out << std::setw(10) << std::setprecision(4) << elem.second.min_time *1000; 238 | flg = true; 239 | break; 240 | } 241 | } 242 | } 243 | } 244 | } 245 | out << ", "; 246 | } 247 | out << std::endl; 248 | } 249 | kernel = 7; 250 | out << "###Conv Kernel(" << kernel << ")" <k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) { 265 | if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) { 266 | out << std::setw(10) << std::setprecision(4) << elem.second.min_time *1000; 267 | flg = true; 268 | break; 269 | } 270 | } 271 | } 272 | } 273 | } 274 | out << ", "; 275 | } 276 | out << std::endl; 277 | } 278 | return out; 279 | } 280 | 281 | 282 | -------------------------------------------------------------------------------- /modules/profiler.h: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_PROFILER_H 2 | #define YOLO_PROFILER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct LayerInfo 11 | { 12 | int in_c; 13 | int out_c; 14 | int w; 15 | int h; 16 | int k; 17 | int stride; 18 | int groups; 19 | nvinfer1::LayerType type; 20 | }; 21 | 22 | 23 | class SimpleProfiler : public nvinfer1::IProfiler 24 | { 25 | public: 26 | struct Record 27 | { 28 | float time{0}; 29 | int count{0}; 30 | float min_time{-1.0}; 31 | int index; 32 | }; 33 | /* 34 | * Construct a profiler 35 | * name: Name of the profiler 36 | * src_profilers: Optionally initialize profiler with data of one or more other profilers 37 | * This is usefull for aggregating results of different profilers 38 | * Aggregation will sum all runtime periods and all invokations for each reported 39 | * layer of all given profilers 40 | */ 41 | SimpleProfiler(std::string name, 42 | const std::vector& src_profilers = std::vector()); 43 | 44 | void reportLayerTime(const char* layerName, float ms) noexcept override; 45 | 46 | void setProfDict(nvinfer1::ILayer *layer) noexcept; 47 | LayerInfo *getILayerFromName(std::string name) noexcept; 48 | friend std::ostream& operator<<(std::ostream& out, SimpleProfiler& value); 49 | 50 | private: 51 | std::string m_name; 52 | std::map m_profile; 53 | int m_index; 54 | std::mapm_layer_dict; 55 | }; 56 | 57 | 58 | 59 | #endif /* JETNET_PROFILER_H */ 60 | -------------------------------------------------------------------------------- /modules/yolo.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #ifndef _YOLO_H_ 27 | #define _YOLO_H_ 28 | 29 | #include "calibrator.h" 30 | //#include "plugin_factory.h" 31 | #include "trt_utils.h" 32 | 33 | #include "NvInfer.h" 34 | #include "NvInferPlugin.h" 35 | #include "NvInferRuntimeCommon.h" 36 | #include "cuda_runtime_api.h" 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include "class_timer.hpp" 43 | #include "opencv2/opencv.hpp" 44 | #include "detect.h" 45 | #include "logging.h" 46 | #include "profiler.h" 47 | 48 | /** 49 | * Holds all the file paths required to build a network. 50 | */ 51 | struct NetworkInfo 52 | { 53 | std::string networkType; 54 | std::string configFilePath; 55 | std::string wtsFilePath; 56 | std::string labelsFilePath; 57 | std::string precision; 58 | std::string deviceType; 59 | std::string calibrationTablePath; 60 | std::string enginePath; 61 | std::string inputBlobName; 62 | std::string data_path; 63 | int batch; 64 | int width; 65 | int height; 66 | int dla; 67 | }; 68 | 69 | /** 70 | * Holds information about runtime inference params. 71 | */ 72 | struct InferParams 73 | { 74 | bool printPerfInfo; 75 | bool printPredictionInfo; 76 | std::string calibImages; 77 | std::string calibImagesPath; 78 | float probThresh; 79 | float nmsThresh; 80 | bool prof; 81 | }; 82 | 83 | /** 84 | * Holds information about an output tensor of the yolo network. 85 | */ 86 | struct TensorInfo 87 | { 88 | std::string blobName; 89 | uint32_t stride{0}; 90 | uint32_t stride_h{0}; 91 | uint32_t stride_w{0}; 92 | uint32_t gridSize{0}; 93 | uint32_t grid_h{ 0 }; 94 | uint32_t grid_w{ 0 }; 95 | uint32_t numClasses{0}; 96 | uint32_t numBBoxes{0}; 97 | uint64_t volume{0}; 98 | std::vector masks; 99 | std::vector anchors; 100 | int bindingIndex{-1}; 101 | float* hostBuffer{nullptr}; 102 | bool segmenter{false}; 103 | bool regression{false}; 104 | std::vector colormap; 105 | std::vector names; 106 | bool depth; 107 | }; 108 | 109 | 110 | class Yolo 111 | { 112 | public: 113 | std::string getNetworkType() const { return m_NetworkType; } 114 | float getNMSThresh() const { return m_NMSThresh; } 115 | std::string getClassName(const int& label) const { return m_ClassNames.at(label); } 116 | int getClassId(const int& label) const { return m_ClassIds.at(label); } 117 | uint32_t getInputH() const { return m_InputH; } 118 | uint32_t getInputW() const { return m_InputW; } 119 | uint32_t getNumClasses() const { return static_cast(m_ClassNames.size()); } 120 | bool isPrintPredictions() const { return m_PrintPredictions; } 121 | bool isPrintPerfInfo() const { return m_PrintPerfInfo; } 122 | void preprocess_gpu(unsigned char *src, int w, int h); 123 | void doInference(const unsigned char* input, const uint32_t batchSize); 124 | void doProfiling(const unsigned char* input, const uint32_t batchSize); 125 | std::vector decodeDetections(const int& imageIdx, 126 | const int& imageH, 127 | const int& imageW); 128 | std::vector apply_argmax(const int& imageIdx); 129 | std::vector get_colorlbl(std::vector &argmax); 130 | std::vector get_depthmap(std::vector &argmax) ; 131 | std::vector get_depthmap_from_logistic(const int& imageIdx) ; 132 | cv::Mat get_bev_from_lidar(cv::Mat &rangeImg, cv::Mat &seg); 133 | void get_backprojection(const int& imageIdx, int im_w, int im_h, cv::Mat &seg, std::vector &binfos, cv::Mat &bev); 134 | cv::Mat get_heightmap(const int& imageIdx, int im_w, int im_h); 135 | void get_filtered_bev_from_logistic(const int& imageIdx, int im_w, int im_h, cv::Mat &seg, std::vector &binfos, cv::Mat &bev); 136 | void visualize_vidar_with_pcl(const int& imageIdx, int im_w, int im_h, cv::Mat &seg); 137 | uint32_t *get_detection_colormap(void); 138 | std::vector get_detection_names(int id); 139 | void print_profiling(); 140 | virtual ~Yolo(); 141 | 142 | protected: 143 | Yolo( const NetworkInfo& networkInfo, const InferParams& inferParams); 144 | std::string m_EnginePath; 145 | const std::string m_NetworkType; 146 | const std::string m_ConfigFilePath; 147 | const std::string m_WtsFilePath; 148 | const std::string m_LabelsFilePath; 149 | const std::string m_Precision; 150 | const std::string m_DeviceType; 151 | const std::string m_CalibImages; 152 | const std::string m_CalibImagesFilePath; 153 | std::string m_CalibTableFilePath; 154 | const std::string m_InputBlobName; 155 | std::vector m_OutputTensors; 156 | std::vector> m_configBlocks; 157 | uint32_t m_InputH; 158 | uint32_t m_InputW; 159 | uint32_t m_InputC; 160 | uint64_t m_InputSize; 161 | uint32_t _n_classes = 0; 162 | float _f_depth_multiple = 0; 163 | float _f_width_multiple = 0; 164 | const float m_ProbThresh; 165 | const float m_NMSThresh; 166 | std::vector m_ClassNames; 167 | // Class ids for coco benchmarking 168 | const std::vector m_ClassIds{ 169 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 170 | 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 171 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 172 | 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90}; 173 | const bool m_PrintPerfInfo; 174 | const bool m_PrintPredictions; 175 | // TRT specific members 176 | //Logger glogger; 177 | uint32_t m_BatchSize = 1; 178 | nvinfer1::INetworkDefinition* m_Network; 179 | nvinfer1::IBuilder* m_Builder ; 180 | nvinfer1::IHostMemory* m_ModelStream; 181 | nvinfer1::ICudaEngine* m_Engine; 182 | nvinfer1::IExecutionContext* m_Context; 183 | std::vector m_DeviceBuffers; 184 | int m_InputBindingIndex; 185 | cudaStream_t m_CudaStream; 186 | 187 | SimpleProfiler m_model_profiler; 188 | SimpleProfiler m_host_profiler; 189 | int m_dla; 190 | unsigned char* m_h_img; 191 | unsigned char* m_d_img; 192 | 193 | //PluginFactory* m_PluginFactory; 194 | // std::unique_ptr m_TinyMaxpoolPaddingFormula; 195 | 196 | virtual std::vector decodeTensor(const int imageIdx, const int imageH, 197 | const int imageW, const TensorInfo& tensor) 198 | = 0; 199 | 200 | inline void addBBoxProposal(const float bx, const float by, const float bw, const float bh, 201 | const uint32_t stride, const float scalingFactor, const float xOffset, 202 | const float yOffset, const int maxIndex, const float maxProb, 203 | const uint32_t image_w, const uint32_t image_h, 204 | std::vector& binfo) 205 | { 206 | BBoxInfo bbi; 207 | bbi.box = convertBBoxNetRes(bx, by, bw, bh, stride, m_InputW, m_InputH); 208 | if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2)) 209 | { 210 | return; 211 | } 212 | // convertBBoxImgRes(scalingFactor, m_InputW,m_InputH,image_w,image_h, bbi.box); 213 | bbi.label = maxIndex; 214 | bbi.prob = maxProb; 215 | bbi.classId = getClassId(maxIndex); 216 | binfo.push_back(bbi); 217 | } 218 | 219 | void calcuate_letterbox_message(const int m_InputH, const int m_InputW, 220 | const int imageH, const int imageW, 221 | float &sh,float &sw, 222 | int &xOffset,int &yOffset) 223 | { 224 | float r = std::min(static_cast(m_InputH) / static_cast(imageH), static_cast(m_InputW) / static_cast(imageW)); 225 | int resizeH = (std::round(imageH*r)); 226 | int resizeW = (std::round(imageW*r)); 227 | 228 | sh = r; 229 | sw = r; 230 | if ((m_InputW - resizeW) % 2) resizeW--; 231 | if ((m_InputH - resizeH) % 2) resizeH--; 232 | assert((m_InputW - resizeW) % 2 == 0); 233 | assert((m_InputH - resizeH) % 2 == 0); 234 | xOffset = (m_InputW - resizeW) / 2; 235 | yOffset = (m_InputH - resizeH) / 2; 236 | } 237 | BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh, 238 | const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH) 239 | { 240 | BBox b; 241 | // Restore coordinates to network input resolution 242 | float x = bx * stride_w_; 243 | float y = by * stride_h_; 244 | 245 | b.x1 = x - bw / 2; 246 | b.x2 = x + bw / 2; 247 | 248 | b.y1 = y - bh / 2; 249 | b.y2 = y + bh / 2; 250 | 251 | b.x1 = clamp(b.x1, 0, netW); 252 | b.x2 = clamp(b.x2, 0, netW); 253 | b.y1 = clamp(b.y1, 0, netH); 254 | b.y2 = clamp(b.y2, 0, netH); 255 | 256 | return b; 257 | } 258 | 259 | inline void cvt_box(const float sh, 260 | const float sw, 261 | const float xOffset, 262 | const float yOffset, 263 | BBox& bbox) 264 | { 265 | //// Undo Letterbox 266 | bbox.x1 -= xOffset; 267 | bbox.x2 -= xOffset; 268 | bbox.y1 -= yOffset; 269 | bbox.y2 -= yOffset; 270 | //// Restore to input resolution 271 | bbox.x1 /= sw; 272 | bbox.x2 /= sw; 273 | bbox.y1 /= sh; 274 | bbox.y2 /= sh; 275 | } 276 | 277 | inline void add_bbox_proposal(const float bx, const float by, const float bw, const float bh, 278 | const uint32_t stride_h_, const uint32_t stride_w_, const float scaleH, const float scaleW, const float xoffset_, const float yoffset, const int maxIndex, const float maxProb, 279 | const uint32_t image_w, const uint32_t image_h, 280 | std::vector& binfo) 281 | { 282 | BBoxInfo bbi; 283 | bbi.box = convert_bbox_res(bx, by, bw, bh, stride_h_, stride_w_, m_InputW, m_InputH); 284 | if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2)) 285 | { 286 | return; 287 | } 288 | if ("yolov5" == m_NetworkType) 289 | { 290 | cvt_box(scaleH, scaleW, xoffset_, yoffset, bbi.box); 291 | } 292 | else 293 | { 294 | bbi.box.x1 = ((float)bbi.box.x1 / (float)m_InputW)*(float)image_w; 295 | bbi.box.y1 = ((float)bbi.box.y1 / (float)m_InputH)*(float)image_h; 296 | bbi.box.x2 = ((float)bbi.box.x2 / (float)m_InputW)*(float)image_w; 297 | bbi.box.y2 = ((float)bbi.box.y2 / (float)m_InputH)*(float)image_h; 298 | } 299 | 300 | bbi.label = maxIndex; 301 | bbi.prob = maxProb; 302 | bbi.classId = getClassId(maxIndex); 303 | binfo.push_back(bbi); 304 | }; 305 | private: 306 | Logger m_Logger; 307 | void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT, 308 | Int8EntropyCalibrator* calibrator = nullptr); 309 | std::vector> parseConfigFile(const std::string cfgFilePath); 310 | void parseConfigBlocks(); 311 | void allocateBuffers(); 312 | bool verifyYoloEngine(); 313 | void destroyNetworkUtils(std::vector& trtWeights); 314 | void writePlanFileToDisk(); 315 | 316 | private: 317 | Timer _timer; 318 | 319 | int _n_yolo_ind = 0; 320 | }; 321 | 322 | #define GRID_H 600 323 | #define GRID_W 400 324 | #endif // _YOLO_H_ 325 | -------------------------------------------------------------------------------- /modules/yolo_config_parser.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #ifndef _YOLO_CONFIG_PARSER_ 27 | #define _YOLO_CONFIG_PARSER_ 28 | 29 | #include "yolo.h" 30 | 31 | #include 32 | #include 33 | 34 | // Init to be called at the very beginning to verify all config params are valid 35 | void yoloConfigParserInit(int argc, char** argv); 36 | 37 | NetworkInfo getYoloNetworkInfo(); 38 | NetworkInfo getYoloNetworkInfo1(); 39 | NetworkInfo getYoloNetworkInfo2(); 40 | std::string getDirectoryPath(void); 41 | std::string getVideoPath(void); 42 | int getCameraID(void); 43 | //InferParams getYoloInferParams(); 44 | //uint64_t getSeed(); 45 | //std::string getNetworkType(); 46 | //std::string getPrecision(); 47 | //std::string getTestImages(); 48 | //std::string getTestImagesPath(); 49 | //bool getDecode(); 50 | //bool getDoBenchmark(); 51 | //bool getViewDetections(); 52 | bool getSaveDetections(); 53 | std::string getSaveDetectionsPath(); 54 | //uint32_t getBatchSize(); 55 | //bool getShuffleTestSet(); 56 | bool 57 | get_dont_show_flg(void); 58 | bool 59 | get_prof_flg(void); 60 | std::vector 61 | get_names(void); 62 | std::string 63 | get_dump_path(void); 64 | double 65 | get_score_thresh(void); 66 | bool 67 | get_multi_precision_flg(void); 68 | bool 69 | get_cuda_flg(void); 70 | std::string 71 | get_output_path(void); 72 | std::string 73 | get_target_label(void); 74 | bool 75 | get_lidar_flg(void); 76 | std::string 77 | get_depth_colormap(void); 78 | #endif //_YOLO_CONFIG_PARSER_ 79 | -------------------------------------------------------------------------------- /modules/yoloplugin_lib.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #include "yoloplugin_lib.h" 27 | #include "yolo_config_parser.h" 28 | #include "yolov2.h" 29 | #include "yolov3.h" 30 | 31 | #include 32 | #include 33 | 34 | static void decodeBatchDetections(const YoloPluginCtx* ctx, std::vector& outputs) 35 | { 36 | for (uint32_t p = 0; p < ctx->batchSize; ++p) 37 | { 38 | YoloPluginOutput* out = new YoloPluginOutput; 39 | std::vector binfo = ctx->inferenceNetwork->decodeDetections( 40 | p, ctx->initParams.processingHeight, ctx->initParams.processingWidth); 41 | std::vector remaining; 42 | /*std::vector remaining = nmsAllClasses( 43 | ctx->inferenceNetwork->getNMSThresh(), binfo, ctx->inferenceNetwork->getNumClasses(),);*/ 44 | out->numObjects = remaining.size(); 45 | assert(out->numObjects <= MAX_OBJECTS_PER_FRAME); 46 | for (uint32_t j = 0; j < remaining.size(); ++j) 47 | { 48 | BBoxInfo b = remaining.at(j); 49 | YoloPluginObject obj; 50 | obj.left = static_cast(b.box.x1); 51 | obj.top = static_cast(b.box.y1); 52 | obj.width = static_cast(b.box.x2 - b.box.x1); 53 | obj.height = static_cast(b.box.y2 - b.box.y1); 54 | strcpy(obj.label, ctx->inferenceNetwork->getClassName(b.label).c_str()); 55 | out->object[j] = obj; 56 | 57 | if (ctx->inferParams.printPredictionInfo) 58 | { 59 | printPredictions(b, ctx->inferenceNetwork->getClassName(b.label)); 60 | } 61 | } 62 | outputs.at(p) = out; 63 | } 64 | } 65 | 66 | static void dsPreProcessBatchInput(const std::vector& cvmats, cv::Mat& batchBlob, 67 | const int& processingHeight, const int& processingWidth, 68 | const int& inputH, const int& inputW) 69 | { 70 | 71 | std::vector batch_images( 72 | cvmats.size(), cv::Mat(cv::Size(processingWidth, processingHeight), CV_8UC3)); 73 | for (uint32_t i = 0; i < cvmats.size(); ++i) 74 | { 75 | cv::Mat imageResize, imageBorder, inputImage; 76 | inputImage = *cvmats.at(i); 77 | int maxBorder = std::max(inputImage.size().width, inputImage.size().height); 78 | 79 | assert((maxBorder - inputImage.size().height) % 2 == 0); 80 | assert((maxBorder - inputImage.size().width) % 2 == 0); 81 | 82 | int yOffset = (maxBorder - inputImage.size().height) / 2; 83 | int xOffset = (maxBorder - inputImage.size().width) / 2; 84 | 85 | // Letterbox and resize to maintain aspect ratio 86 | cv::copyMakeBorder(inputImage, imageBorder, yOffset, yOffset, xOffset, xOffset, 87 | cv::BORDER_CONSTANT, cv::Scalar(127.5, 127.5, 127.5)); 88 | cv::resize(imageBorder, imageResize, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC); 89 | batch_images.at(i) = imageResize; 90 | } 91 | 92 | batchBlob = cv::dnn::blobFromImages(batch_images, 1.0, cv::Size(inputW, inputH), 93 | cv::Scalar(0.0, 0.0, 0.0), false); 94 | } 95 | 96 | YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchSize) 97 | { 98 | char** gArgV = new char*[2]; 99 | gArgV[0] = new char[64]; 100 | gArgV[1] = new char[512]; 101 | strcpy(gArgV[0], "yolo_plugin_ctx"); 102 | strcpy(gArgV[1], std::string("--flagfile=" + initParams->configFilePath).c_str()); 103 | // yoloConfigParserInit(2, gArgV); 104 | 105 | YoloPluginCtx* ctx = new YoloPluginCtx; 106 | ctx->initParams = *initParams; 107 | ctx->batchSize = batchSize; 108 | ctx->networkInfo;// = getYoloNetworkInfo(); 109 | ctx->inferParams;// = getYoloInferParams(); 110 | uint32_t configBatchSize;// = getBatchSize(); 111 | 112 | // Check if config batchsize matches buffer batch size in the pipeline 113 | if (ctx->batchSize != configBatchSize) 114 | { 115 | std::cerr 116 | << "WARNING: Batchsize set in config file overriden by pipeline. New batchsize is " 117 | << ctx->batchSize << std::endl; 118 | int npos = ctx->networkInfo.wtsFilePath.find(".weights"); 119 | assert(npos != std::string::npos 120 | && "wts file file not recognised. File needs to be of '.weights' format"); 121 | std::string dataPath = ctx->networkInfo.wtsFilePath.substr(0, npos); 122 | ctx->networkInfo.enginePath = dataPath + "-" + ctx->networkInfo.precision + "-batch" 123 | + std::to_string(ctx->batchSize) + ".engine"; 124 | } 125 | 126 | if ((ctx->networkInfo.networkType == "yolov2") 127 | || (ctx->networkInfo.networkType == "yolov2-tiny")) 128 | { 129 | ctx->inferenceNetwork = new YoloV2(ctx->networkInfo, ctx->inferParams); 130 | } 131 | else if ((ctx->networkInfo.networkType == "yolov3") 132 | || (ctx->networkInfo.networkType == "yolov3-tiny")) 133 | { 134 | ctx->inferenceNetwork = new YoloV3( ctx->networkInfo, ctx->inferParams); 135 | } 136 | else 137 | { 138 | std::cerr << "ERROR: Unrecognized network type " << ctx->networkInfo.networkType 139 | << std::endl; 140 | std::cerr << "Network Type has to be one among the following : yolov2, yolov2-tiny, yolov3 " 141 | "and yolov3-tiny" 142 | << std::endl; 143 | return nullptr; 144 | } 145 | 146 | delete[] gArgV; 147 | return ctx; 148 | } 149 | 150 | std::vector YoloPluginProcess(YoloPluginCtx* ctx, std::vector& cvmats) 151 | { 152 | assert((cvmats.size() <= ctx->batchSize) && "Image batch size exceeds TRT engines batch size"); 153 | std::vector outputs = std::vector(cvmats.size(), nullptr); 154 | cv::Mat preprocessedImages; 155 | std::chrono::duration preElapsed, inferElapsed, postElapsed; 156 | std::chrono::time_point preStart, preEnd, inferStart, inferEnd, postStart, postEnd; 157 | 158 | if (cvmats.size() > 0) 159 | { 160 | // preStart = std::chrono::high_resolution_clock::now(); 161 | dsPreProcessBatchInput(cvmats, preprocessedImages, ctx->initParams.processingWidth, 162 | ctx->initParams.processingHeight, ctx->inferenceNetwork->getInputH(), 163 | ctx->inferenceNetwork->getInputW()); 164 | // preEnd = std::chrono::high_resolution_clock::now(); 165 | 166 | // inferStart = std::chrono::high_resolution_clock::now(); 167 | ctx->inferenceNetwork->doInference(preprocessedImages.data, cvmats.size()); 168 | // inferEnd = std::chrono::high_resolution_clock::now(); 169 | 170 | // postStart = std::chrono::high_resolution_clock::now(); 171 | decodeBatchDetections(ctx, outputs); 172 | // postEnd = std::chrono::high_resolution_clock::now(); 173 | } 174 | 175 | // Perf calc 176 | if (ctx->inferParams.printPerfInfo) 177 | { 178 | preElapsed 179 | = ((preEnd - preStart) + (preEnd - preStart) / 1000000.0) 180 | * 1000; 181 | inferElapsed = ((inferEnd - inferStart) 182 | + (inferEnd - inferStart) / 1000000.0) 183 | * 1000; 184 | postElapsed = ((postEnd - postStart) 185 | + (postEnd - postStart) / 1000000.0) 186 | * 1000; 187 | 188 | ctx->inferTime += inferElapsed.count(); 189 | ctx->preTime += preElapsed.count(); 190 | ctx->postTime += postElapsed.count(); 191 | ctx->imageCount += cvmats.size(); 192 | } 193 | return outputs; 194 | } 195 | 196 | void YoloPluginCtxDeinit(YoloPluginCtx* ctx) 197 | { 198 | if (ctx->inferParams.printPerfInfo) 199 | { 200 | std::cout << "Yolo Plugin Perf Summary " << std::endl; 201 | std::cout << "Batch Size : " << ctx->batchSize << std::endl; 202 | std::cout << std::fixed << std::setprecision(4) 203 | << "PreProcess : " << ctx->preTime / ctx->imageCount 204 | << " ms Inference : " << ctx->inferTime / ctx->imageCount 205 | << " ms PostProcess : " << ctx->postTime / ctx->imageCount << " ms Total : " 206 | << (ctx->preTime + ctx->postTime + ctx->inferTime) / ctx->imageCount 207 | << " ms per Image" << std::endl; 208 | } 209 | 210 | delete ctx->inferenceNetwork; 211 | delete ctx; 212 | } 213 | -------------------------------------------------------------------------------- /modules/yoloplugin_lib.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | #ifndef __YOLOPLUGIN_LIB__ 26 | #define __YOLOPLUGIN_LIB__ 27 | 28 | //#include 29 | 30 | #include "calibrator.h" 31 | #include "trt_utils.h" 32 | #include "yolo.h" 33 | 34 | //#ifdef __cplusplus 35 | //extern "C" { 36 | //#endif 37 | 38 | #define MAX_OBJECTS_PER_FRAME 32 39 | typedef struct YoloPluginCtx YoloPluginCtx; 40 | typedef struct YoloPluginOutput YoloPluginOutput; 41 | // Init parameters structure as input, required for instantiating yoloplugin_lib 42 | typedef struct 43 | { 44 | // Width at which frame/object will be scaled 45 | int processingWidth; 46 | // height at which frame/object will be scaled 47 | int processingHeight; 48 | // Flag to indicate whether operating on crops of full frame 49 | int fullFrame; 50 | // Plugin config file 51 | std::string configFilePath; 52 | } YoloPluginInitParams; 53 | 54 | struct YoloPluginCtx 55 | { 56 | YoloPluginInitParams initParams; 57 | NetworkInfo networkInfo; 58 | InferParams inferParams; 59 | Yolo* inferenceNetwork; 60 | 61 | // perf vars 62 | float inferTime = 0.0, preTime = 0.0, postTime = 0.0; 63 | uint32_t batchSize = 0; 64 | uint64_t imageCount = 0; 65 | }; 66 | 67 | // Detected/Labelled object structure, stores bounding box info along with label 68 | typedef struct 69 | { 70 | int left; 71 | int top; 72 | int width; 73 | int height; 74 | char label[64]; 75 | } YoloPluginObject; 76 | 77 | // Output data returned after processing 78 | struct YoloPluginOutput 79 | { 80 | int numObjects; 81 | YoloPluginObject object[MAX_OBJECTS_PER_FRAME]; 82 | }; 83 | 84 | // Initialize library context 85 | YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchSize); 86 | 87 | // Dequeue processed output 88 | std::vector YoloPluginProcess(YoloPluginCtx* ctx, std::vector& cvmats); 89 | 90 | // Deinitialize library context 91 | void YoloPluginCtxDeinit(YoloPluginCtx* ctx); 92 | 93 | //#ifdef __cplusplus 94 | //} 95 | //#endif 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /modules/yolov2.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #include "yolov2.h" 27 | #include 28 | 29 | YoloV2::YoloV2(const NetworkInfo& networkInfo, 30 | const InferParams& inferParams) : 31 | Yolo(networkInfo, inferParams){} 32 | 33 | std::vector YoloV2::decodeTensor(const int imageIdx, const int imageH, const int imageW, 34 | const TensorInfo& tensor) 35 | { 36 | float scalingFactor 37 | = std::min(static_cast(m_InputW) / imageW, static_cast(m_InputH) / imageH); 38 | float xOffset = (m_InputW - scalingFactor * imageW) / 2; 39 | float yOffset = (m_InputH - scalingFactor * imageH) / 2; 40 | 41 | float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; 42 | 43 | std::vector binfo; 44 | for (uint32_t y = 0; y < tensor.gridSize; y++) 45 | { 46 | for (uint32_t x = 0; x < tensor.gridSize; x++) 47 | { 48 | for (uint32_t b = 0; b < tensor.numBBoxes; b++) 49 | { 50 | const float pw = tensor.anchors[2 * b]; 51 | const float ph = tensor.anchors[2 * b + 1]; 52 | const int numGridCells = tensor.gridSize * tensor.gridSize; 53 | const int bbindex = y * tensor.gridSize + x; 54 | const float bx 55 | = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)]; 56 | const float by 57 | = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)]; 58 | const float bw = pw 59 | * exp(detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]); 60 | const float bh = ph 61 | * exp(detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]); 62 | 63 | const float objectness 64 | = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; 65 | float maxProb = 0.0f; 66 | int maxIndex = -1; 67 | 68 | for (uint32_t i = 0; i < tensor.numClasses; i++) 69 | { 70 | float prob 71 | = detections[bbindex 72 | + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]; 73 | 74 | if (prob > maxProb) 75 | { 76 | maxProb = prob; 77 | maxIndex = i; 78 | } 79 | } 80 | 81 | maxProb = objectness * maxProb; 82 | 83 | if (maxProb > m_ProbThresh) 84 | { 85 | addBBoxProposal(bx, by, bw, bh, tensor.stride, scalingFactor, xOffset, yOffset, 86 | maxIndex, maxProb,imageW,imageH, binfo); 87 | } 88 | } 89 | } 90 | } 91 | return binfo; 92 | } 93 | -------------------------------------------------------------------------------- /modules/yolov2.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #ifndef _YOLO_V2_ 27 | #define _YOLO_V2_ 28 | 29 | #include "yolo.h" 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | class YoloV2 : public Yolo 36 | { 37 | public: 38 | YoloV2( const NetworkInfo& networkInfo, const InferParams& inferParams); 39 | 40 | private: 41 | std::vector decodeTensor(const int imageIdx, const int imageH, const int imageW, 42 | const TensorInfo& tensor) override; 43 | }; 44 | 45 | #endif // _YOLO_V2_ -------------------------------------------------------------------------------- /modules/yolov3.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | #include "yolov3.h" 26 | 27 | YoloV3::YoloV3(const NetworkInfo& networkInfo, 28 | const InferParams& inferParams) : 29 | Yolo( networkInfo, inferParams){} 30 | 31 | std::vector YoloV3::decodeTensor(const int imageIdx, 32 | const int imageH, 33 | const int imageW, 34 | const TensorInfo& tensor) 35 | { 36 | float scale_h = 1.f; 37 | float scale_w = 1.f; 38 | int xOffset = 0; 39 | int yOffset = 0; 40 | 41 | const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; 42 | 43 | std::vector binfo; 44 | for (uint32_t y = 0; y < tensor.grid_h; ++y) 45 | { 46 | for (uint32_t x = 0; x < tensor.grid_w; ++x) 47 | { 48 | for (uint32_t b = 0; b < tensor.numBBoxes; ++b) 49 | { 50 | const float pw = tensor.anchors[tensor.masks[b] * 2]; 51 | const float ph = tensor.anchors[tensor.masks[b] * 2 + 1]; 52 | 53 | const int numGridCells = tensor.grid_h * tensor.grid_w; 54 | const int bbindex = y * tensor.grid_w+ x; 55 | const float bx 56 | = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)]; 57 | 58 | const float by 59 | = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)]; 60 | const float bw 61 | = pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]; 62 | const float bh 63 | = ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]; 64 | 65 | const float objectness 66 | = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; 67 | 68 | float maxProb = 0.0f; 69 | int maxIndex = -1; 70 | 71 | for (uint32_t i = 0; i < tensor.numClasses; ++i) 72 | { 73 | float prob 74 | = (detections[bbindex 75 | + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]); 76 | 77 | if (prob > maxProb) 78 | { 79 | maxProb = prob; 80 | maxIndex = i; 81 | } 82 | } 83 | maxProb = objectness * maxProb; 84 | 85 | if (maxProb > m_ProbThresh) 86 | { 87 | add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo); 88 | } 89 | } 90 | } 91 | } 92 | return binfo; 93 | } 94 | -------------------------------------------------------------------------------- /modules/yolov3.h: -------------------------------------------------------------------------------- 1 | /** 2 | MIT License 3 | 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | * 24 | */ 25 | 26 | #ifndef _YOLO_V3_ 27 | #define _YOLO_V3_ 28 | 29 | #include "yolo.h" 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | class YoloV3 : public Yolo 36 | { 37 | public: 38 | YoloV3(const NetworkInfo& networkInfo, const InferParams& inferParams); 39 | 40 | private: 41 | std::vector decodeTensor(const int imageIdx, const int imageH, const int imageW, 42 | const TensorInfo& tensor) override; 43 | }; 44 | 45 | #endif // _YOLO_V3_ -------------------------------------------------------------------------------- /modules/yolov4.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "yolov4.h" 3 | 4 | YoloV4::YoloV4( const NetworkInfo &network_info_, 5 | const InferParams &infer_params_) : 6 | Yolo(network_info_, infer_params_) {} 7 | 8 | std::vector YoloV4::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor) 9 | { 10 | float scale_h = 1.f; 11 | float scale_w = 1.f; 12 | int xOffset = 0; 13 | int yOffset = 0; 14 | 15 | const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; 16 | 17 | std::vector binfo; 18 | float scale_x_y = 2.0; 19 | float offset = 0.5 * (scale_x_y-1.0); 20 | for (uint32_t y = 0; y < tensor.grid_h; ++y) 21 | { 22 | for (uint32_t x = 0; x < tensor.grid_w; ++x) 23 | { 24 | for (uint32_t b = 0; b < tensor.numBBoxes; ++b) 25 | { 26 | const int numGridCells = tensor.grid_h * tensor.grid_w; 27 | const int bbindex = y * tensor.grid_w + x; 28 | 29 | const float objectness 30 | = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; 31 | if (objectness < m_ProbThresh) { 32 | continue; 33 | } 34 | 35 | const float pw = tensor.anchors[tensor.masks[b] * 2]; 36 | const float ph = tensor.anchors[tensor.masks[b] * 2 + 1]; 37 | 38 | const float bx 39 | = x + scale_x_y * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)] - offset; 40 | const float by 41 | = y + scale_x_y * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)] - offset; 42 | const float bw 43 | = pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)] * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)] * 4; 44 | const float bh 45 | = ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)] * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)] * 4; 46 | 47 | 48 | float maxProb = 0.0f; 49 | int maxIndex = -1; 50 | 51 | for (uint32_t i = 0; i < tensor.numClasses; ++i) 52 | { 53 | float prob 54 | = (detections[bbindex 55 | + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]); 56 | 57 | if (prob > maxProb) 58 | { 59 | maxProb = prob; 60 | maxIndex = i; 61 | } 62 | } 63 | maxProb = objectness * maxProb; 64 | 65 | if (maxProb > m_ProbThresh) 66 | { 67 | add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo); 68 | } 69 | } 70 | } 71 | } 72 | return binfo; 73 | } 74 | -------------------------------------------------------------------------------- /modules/yolov4.h: -------------------------------------------------------------------------------- 1 | #ifndef CLASS_YOLOV4_H_ 2 | #define CLASS_YOLOV4_H_ 3 | #include "yolo.h" 4 | class YoloV4 :public Yolo 5 | { 6 | public: 7 | YoloV4( 8 | const NetworkInfo &network_info_, 9 | const InferParams &infer_params_); 10 | private: 11 | std::vector decodeTensor(const int imageIdx, 12 | const int imageH, 13 | const int imageW, 14 | const TensorInfo& tensor) override; 15 | }; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /modules/yolov5.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "yolov5.h" 3 | 4 | 5 | YoloV5::YoloV5( 6 | const NetworkInfo &network_info_, 7 | const InferParams &infer_params_) : 8 | Yolo( network_info_, infer_params_) {} 9 | 10 | std::vector YoloV5::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor) 11 | { 12 | float scale_h = 1.f; 13 | float scale_w = 1.f; 14 | int xOffset = 0; 15 | int yOffset = 0; 16 | calcuate_letterbox_message(m_InputH, m_InputW, imageH, imageW, scale_h, scale_w, xOffset, yOffset); 17 | const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; 18 | 19 | std::vector binfo; 20 | for (uint32_t y = 0; y < tensor.grid_h; ++y) 21 | { 22 | for (uint32_t x = 0; x < tensor.grid_w; ++x) 23 | { 24 | for (uint32_t b = 0; b < tensor.numBBoxes; ++b) 25 | { 26 | const float pw = tensor.anchors[tensor.masks[b] * 2]; 27 | const float ph = tensor.anchors[tensor.masks[b] * 2 + 1]; 28 | 29 | const int numGridCells = tensor.grid_h * tensor.grid_w; 30 | const int bbindex = y * tensor.grid_w+ x; 31 | const float bx 32 | = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)]; 33 | 34 | const float by 35 | = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)]; 36 | const float bw 37 | = pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]; 38 | const float bh 39 | = ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]; 40 | 41 | const float objectness 42 | = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; 43 | 44 | float maxProb = 0.0f; 45 | int maxIndex = -1; 46 | 47 | for (uint32_t i = 0; i < tensor.numClasses; ++i) 48 | { 49 | float prob 50 | = (detections[bbindex 51 | + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]); 52 | 53 | if (prob > maxProb) 54 | { 55 | maxProb = prob; 56 | maxIndex = i; 57 | } 58 | } 59 | maxProb = objectness * maxProb; 60 | 61 | if (maxProb > m_ProbThresh) 62 | { 63 | add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w,xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo); 64 | } 65 | } 66 | } 67 | } 68 | return binfo; 69 | } 70 | -------------------------------------------------------------------------------- /modules/yolov5.h: -------------------------------------------------------------------------------- 1 | #ifndef CLASS_YOLOV5_H_ 2 | #define CLASS_YOLOV5_H_ 3 | #include "yolo.h" 4 | class YoloV5 :public Yolo 5 | { 6 | public: 7 | YoloV5( 8 | const NetworkInfo &network_info_, 9 | const InferParams &infer_params_); 10 | 11 | BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh, 12 | const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH) 13 | { 14 | BBox b; 15 | // Restore coordinates to network input resolution 16 | float x = bx * stride_w_; 17 | float y = by * stride_h_; 18 | 19 | b.x1 = x - bw / 2; 20 | b.x2 = x + bw / 2; 21 | 22 | b.y1 = y - bh / 2; 23 | b.y2 = y + bh / 2; 24 | 25 | b.x1 = clamp(b.x1, 0, netW); 26 | b.x2 = clamp(b.x2, 0, netW); 27 | b.y1 = clamp(b.y1, 0, netH); 28 | b.y2 = clamp(b.y2, 0, netH); 29 | 30 | return b; 31 | } 32 | 33 | 34 | private: 35 | std::vector decodeTensor(const int imageIdx, 36 | const int imageH, 37 | const int imageW, 38 | const TensorInfo& tensor) override; 39 | }; 40 | 41 | #endif -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | lightnet_trt 5 | 0.0.0 6 | The ROS 2 package for lightNet-TRT 7 | Koji Minoda 8 | Apache License 2.0 9 | Koji Minoda 10 | 11 | ament_cmake_auto 12 | autoware_cmake 13 | 14 | sensor_msgs 15 | rclcpp 16 | cv_bridge 17 | image_transport 18 | rclcpp_components 19 | tier4_autoware_utils 20 | tf2_eigen 21 | 22 | ament_cmake_ros 23 | ament_lint_auto 24 | autoware_lint_common 25 | 26 | 27 | ament_cmake 28 | 29 | 30 | -------------------------------------------------------------------------------- /samples/parallel_executor.cpp: -------------------------------------------------------------------------------- 1 | #include "class_timer.hpp" 2 | #include "class_detector.h" 3 | #include "yolo_config_parser.h" 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | namespace fs = boost::filesystem; 14 | 15 | template 16 | std::string format(const std::string& fmt, Args ... args ) 17 | { 18 | size_t len = std::snprintf( nullptr, 0, fmt.c_str(), args ... ); 19 | std::vector buf(len + 1); 20 | std::snprintf(&buf[0], len + 1, fmt.c_str(), args ... ); 21 | return std::string(&buf[0], &buf[0] + len); 22 | } 23 | 24 | void 25 | write_prediction(std::string dumpPath, std::string filename, std::vector names, std::vector objects, int width, int height) 26 | { 27 | int pos = filename.find_last_of("."); 28 | std::string body = filename.substr(0, pos); 29 | std::string dstName = body + ".txt"; 30 | std::ofstream writing_file; 31 | fs::path p = dumpPath; 32 | fs::create_directory(p); 33 | p.append(dstName); 34 | writing_file.open(p.string(), std::ios::out); 35 | for (const auto & object : objects) { 36 | const auto left = object.rect.x; 37 | const auto top = object.rect.y; 38 | const auto right = std::clamp(left + object.rect.width, 0, width); 39 | const auto bottom = std::clamp(top + object.rect.height, 0, height); 40 | std::string writing_text = format("%s %f %d %d %d %d", names[object.id].c_str(), object.prob, left, top, right, bottom); 41 | writing_file << writing_text << std::endl; 42 | } 43 | writing_file.close(); 44 | } 45 | 46 | 47 | int main(int argc, char** argv) 48 | { 49 | gflags::SetUsageMessage( 50 | "Usage : trt-yolo-app --flagfile= --=value ..."); 51 | 52 | // parse config params 53 | yoloConfigParserInit(argc, argv); 54 | NetworkInfo yoloInfo = getYoloNetworkInfo(); 55 | NetworkInfo yoloInfo1 = getYoloNetworkInfo1(); 56 | NetworkInfo yoloInfo2 = getYoloNetworkInfo2(); 57 | std::string directory = getDirectoryPath(); 58 | std::string videoPath = getVideoPath(); 59 | bool flg_save = getSaveDetections(); 60 | std::string save_path = getSaveDetectionsPath(); 61 | bool dont_show = get_dont_show_flg(); 62 | const std::string dumpPath = get_dump_path(); 63 | const bool cuda = get_cuda_flg(); 64 | Config config; 65 | config.net_type = YOLOV4; 66 | config.file_model_cfg = yoloInfo.configFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse.cfg"; 67 | config.file_model_weights = yoloInfo.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights"; 68 | config.calibration_image_list_file_txt = "../configs/calibration_images.txt"; 69 | if (yoloInfo.precision == "kHALF") { 70 | config.inference_precison = FP16; 71 | }else if (yoloInfo.precision == "kINT8") { 72 | config.inference_precison = INT8; 73 | } else { 74 | config.inference_precison = FP32; 75 | } 76 | config.detect_thresh = (float)get_score_thresh(); 77 | config.batch = yoloInfo.batch; 78 | config.width = yoloInfo.width; 79 | config.height= yoloInfo.height; 80 | config.dla = yoloInfo.dla; 81 | std::unique_ptr detector(new Detector()); 82 | detector->init(config); 83 | /* 84 | Config config1; 85 | config1.net_type = YOLOV4; 86 | config1.file_model_cfg = yoloInfo1.configFilePath; 87 | config1.file_model_weights = yoloInfo1.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights"; 88 | config1.calibration_image_list_file_txt = "../configs/calibration_images.txt"; 89 | if (yoloInfo1.precision == "kHALF") { 90 | config1.inference_precison = FP16; 91 | }else if (yoloInfo1.precision == "kINT8") { 92 | config1.inference_precison = INT8; 93 | } else { 94 | config1.inference_precison = FP32; 95 | } 96 | config1.detect_thresh = (float)get_score_thresh(); 97 | config1.batch = yoloInfo1.batch; 98 | config1.width = yoloInfo1.width; 99 | config1.height= yoloInfo1.height; 100 | config1.dla = yoloInfo1.dla; 101 | std::unique_ptr detector1(new Detector()); 102 | detector1->init(config1); 103 | */ 104 | 105 | Config config2; 106 | config2.net_type = YOLOV4; 107 | config2.file_model_cfg = yoloInfo2.configFilePath; 108 | config2.file_model_weights = yoloInfo2.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights"; 109 | config2.calibration_image_list_file_txt = "../configs/calibration_images.txt"; 110 | if (yoloInfo2.precision == "kHALF") { 111 | config2.inference_precison = FP16; 112 | }else if (yoloInfo2.precision == "kINT8") { 113 | config2.inference_precison = INT8; 114 | } else { 115 | config2.inference_precison = FP32; 116 | } 117 | config2.detect_thresh = (float)get_score_thresh(); 118 | config2.batch = yoloInfo2.batch; 119 | config2.width = yoloInfo2.width; 120 | config2.height= yoloInfo2.height; 121 | config2.dla = yoloInfo2.dla; 122 | std::unique_ptr detector2(new Detector()); 123 | detector2->init(config2); 124 | 125 | std::vector batch_res; 126 | std::vector batch_res1; 127 | std::vector batch_res2; 128 | Timer timer; 129 | if (flg_save) { 130 | fs::create_directory(save_path); 131 | fs::path p = save_path; 132 | p.append("detections"); 133 | fs::create_directory(p); 134 | } 135 | 136 | 137 | if (directory != "") { 138 | for (const auto & file : std::filesystem::directory_iterator(directory)) { 139 | std::cout << file.path() << std::endl; 140 | cv::Mat src = cv::imread(file.path(), cv::IMREAD_UNCHANGED); 141 | std::vector batch_img; 142 | // batch_img.push_back(src); 143 | for (int b = 0; b < config.batch; b++) { 144 | batch_img.push_back(src); 145 | } 146 | timer.reset(); 147 | #pragma omp parallel sections 148 | { 149 | #pragma omp section 150 | { 151 | detector->detect(batch_img, batch_res, cuda); 152 | } 153 | #pragma omp section 154 | { 155 | //detector1->detect(batch_img, batch_res, cuda); 156 | } 157 | #pragma omp section 158 | { 159 | detector2->detect(batch_img, batch_res2, cuda); 160 | } 161 | } 162 | detector->segment(batch_img); 163 | //detector1->segment(batch_img); 164 | detector2->segment(batch_img); 165 | timer.out("detect"); 166 | 167 | if (dumpPath != "not-specified") { 168 | fs::path p (file.path()); 169 | std::string filename = p.filename().string(); 170 | std::vector names = get_names(); 171 | write_prediction(dumpPath, filename, names, batch_res[0], src.cols, src.rows); 172 | } 173 | //disp 174 | if (dont_show == true) { 175 | continue; 176 | } 177 | for (int i=0;idraw_BBox(batch_img[i], r); 180 | } 181 | /* 182 | for (const auto &r : batch_res1[i]) { 183 | detector1->draw_BBox(batch_img[i], r); 184 | } 185 | */ 186 | for (const auto &r : batch_res2[i]) { 187 | detector2->draw_BBox(batch_img[i], r); 188 | } 189 | cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL); 190 | cv::imshow("image"+std::to_string(i), batch_img[i]); 191 | int k = cv::waitKey(0); 192 | if (k == 32) { 193 | fs::path p (file.path()); 194 | std::string name = p.filename().string(); 195 | std::cout << "Save... " << name << std::endl; 196 | detector->save_image(src, "log/", name); 197 | cv::waitKey(0); 198 | } 199 | } 200 | } 201 | } else if (videoPath != ""){ 202 | std::cout << videoPath << std::endl; 203 | cv::VideoCapture video; 204 | video.open(videoPath); 205 | cv::Mat frame; 206 | int count = 0; 207 | while (1) { 208 | video >> frame; 209 | if (frame.empty() == true) break; 210 | 211 | std::vector batch_img; 212 | batch_img.push_back(frame); 213 | timer.reset(); 214 | #pragma omp parallel sections 215 | { 216 | #pragma omp section 217 | { 218 | detector->detect(batch_img, batch_res, cuda); 219 | } 220 | #pragma omp section 221 | { 222 | //detector1->detect(batch_img, batch_res1, cuda); 223 | } 224 | #pragma omp section 225 | { 226 | detector2->detect(batch_img, batch_res2, cuda); 227 | } 228 | } 229 | detector->segment(batch_img); 230 | // detector1->segment(batch_img); 231 | detector2->segment(batch_img); 232 | timer.out("detect"); 233 | 234 | if (dont_show == true) { 235 | continue; 236 | } 237 | //disp 238 | for (int i=0;idraw_BBox(batch_img[i], r); 241 | } 242 | /* 243 | for (const auto &r : batch_res1[i]) { 244 | detector1->draw_BBox(batch_img[i], r); 245 | } 246 | */ 247 | for (const auto &r : batch_res2[i]) { 248 | detector2->draw_BBox(batch_img[i], r); 249 | } 250 | //cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL); 251 | cv::namedWindow("image" + std::to_string(i)); 252 | cv::imshow("image"+std::to_string(i), batch_img[i]); 253 | 254 | if (flg_save) { 255 | std::ostringstream sout; 256 | sout << std::setfill('0') << std::setw(6) << count; 257 | std::string name = "frame_" + sout.str() + ".png"; 258 | fs::path p = save_path; 259 | p.append("detections"); 260 | detector->save_image(batch_img[i], p.string(), name); 261 | } 262 | if (i > 0) { 263 | count++; 264 | } 265 | } 266 | count++; 267 | if (cv::waitKey(1) == 'q') break; 268 | } 269 | } 270 | if (get_prof_flg()) { 271 | detector->dump_profiling(); 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /samples/sample_detector.cpp: -------------------------------------------------------------------------------- 1 | #include "class_timer.hpp" 2 | #include "class_detector.h" 3 | #include "yolo_config_parser.h" 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace fs = boost::filesystem; 14 | 15 | template 16 | std::string format(const std::string& fmt, Args ... args ) 17 | { 18 | size_t len = std::snprintf( nullptr, 0, fmt.c_str(), args ... ); 19 | std::vector buf(len + 1); 20 | std::snprintf(&buf[0], len + 1, fmt.c_str(), args ... ); 21 | return std::string(&buf[0], &buf[0] + len); 22 | } 23 | 24 | void 25 | write_prediction(std::string dumpPath, std::string filename, std::vector names, std::vector objects, int width, int height) 26 | { 27 | int pos = filename.find_last_of("."); 28 | std::string body = filename.substr(0, pos); 29 | std::string dstName = body + ".txt"; 30 | std::ofstream writing_file; 31 | fs::path p = dumpPath; 32 | fs::create_directory(p); 33 | p.append(dstName); 34 | writing_file.open(p.string(), std::ios::out); 35 | for (const auto & object : objects) { 36 | const auto left = object.rect.x; 37 | const auto top = object.rect.y; 38 | const auto right = std::clamp(left + object.rect.width, 0, width); 39 | const auto bottom = std::clamp(top + object.rect.height, 0, height); 40 | std::string writing_text = format("%s %f %d %d %d %d", names[object.id].c_str(), object.prob, left, top, right, bottom); 41 | writing_file << writing_text << std::endl; 42 | } 43 | writing_file.close(); 44 | } 45 | 46 | 47 | void 48 | write_label(std::string outputPath, std::string filename, std::vector names, std::vector objects, int width, int height) 49 | { 50 | int pos = filename.find_last_of("."); 51 | std::string body = filename.substr(0, pos); 52 | std::string dstName = body + ".txt"; 53 | std::ofstream writing_file; 54 | fs::path p = outputPath; 55 | fs::create_directory(p); 56 | p.append(dstName); 57 | writing_file.open(p.string(), std::ios::out); 58 | std::cout << "Write" << p.string() << std::endl; 59 | for (const auto & object : objects) { 60 | const auto left = object.rect.x; 61 | const auto top = object.rect.y; 62 | const auto right = std::clamp(left + object.rect.width, 0, width); 63 | const auto bottom = std::clamp(top + object.rect.height, 0, height); 64 | double x = (left + right) / 2.0 / (double)width; 65 | double y = (top + bottom) / 2.0 / (double)height; 66 | double w = (right - left) / (double)width; 67 | double h = (bottom - top) / (double)height; 68 | std::string writing_text = format("%d %f %f %f %f", object.id, x, y, w, h); 69 | writing_file << writing_text << std::endl; 70 | } 71 | writing_file.close(); 72 | } 73 | 74 | 75 | int main(int argc, char** argv) 76 | { 77 | gflags::SetUsageMessage( 78 | "Usage : trt-yolo-app --flagfile= --=value ..."); 79 | 80 | // parse config params 81 | yoloConfigParserInit(argc, argv); 82 | NetworkInfo yoloInfo = getYoloNetworkInfo(); 83 | std::string directory = getDirectoryPath(); 84 | std::string videoPath = getVideoPath(); 85 | int cam_id = getCameraID(); 86 | bool flg_save = getSaveDetections(); 87 | std::string save_path = getSaveDetectionsPath(); 88 | bool dont_show = get_dont_show_flg(); 89 | const std::string dumpPath = get_dump_path(); 90 | const bool cuda = get_cuda_flg(); 91 | const std::string target = get_target_label(); 92 | const std::string outputPath = get_output_path(); 93 | Config config; 94 | config.net_type = YOLOV4; 95 | config.file_model_cfg = yoloInfo.configFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse.cfg"; 96 | config.file_model_weights = yoloInfo.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights"; 97 | config.calibration_image_list_file_txt = "../configs/calibration_images.txt"; 98 | if (yoloInfo.precision == "kHALF") { 99 | config.inference_precison = FP16; 100 | }else if (yoloInfo.precision == "kINT8") { 101 | config.inference_precison = INT8; 102 | } else { 103 | config.inference_precison = FP32; 104 | } 105 | config.detect_thresh = (float)get_score_thresh(); 106 | config.batch = yoloInfo.batch; 107 | config.width = yoloInfo.width; 108 | config.height= yoloInfo.height; 109 | config.dla = yoloInfo.dla; 110 | std::unique_ptr detector(new Detector()); 111 | detector->init(config); 112 | std::vector batch_res; 113 | if (flg_save) { 114 | fs::create_directory(save_path); 115 | fs::path p = save_path; 116 | p.append("detections"); 117 | fs::create_directory(p); 118 | } 119 | 120 | 121 | if (directory != "") { 122 | for (const auto & file : std::filesystem::directory_iterator(directory)) { 123 | std::cout << file.path() << std::endl; 124 | // cv::Mat src = cv::imread(file.path(), cv::IMREAD_UNCHANGED); 125 | cv::Mat src = cv::imread(file.path());//, cv::IMREAD_COLOR); 126 | fs::path p (file.path()); 127 | std::string name = p.filename().string(); 128 | 129 | std::vector batch_img; 130 | // batch_img.push_back(src); 131 | for (int b = 0; b < config.batch; b++) { 132 | batch_img.push_back(src); 133 | } 134 | detector->detect(batch_img, batch_res, cuda); 135 | detector->segment(batch_img, name); 136 | detector->regress(batch_img, batch_res, name); 137 | 138 | if (dumpPath != "not-specified") { 139 | fs::path p (file.path()); 140 | std::string filename = p.filename().string(); 141 | std::vector names = get_names(); 142 | write_prediction(dumpPath, filename, names, batch_res[0], src.cols, src.rows); 143 | } 144 | if (outputPath != "not-specified") { 145 | fs::path p (file.path()); 146 | std::string filename = p.filename().string(); 147 | std::vector names = get_names(); 148 | write_label(outputPath, filename, names, batch_res[0], src.cols, src.rows); 149 | } 150 | if (target != "") { 151 | std::vector names = get_names(); 152 | for (int i=0;i save into " << p.string() << std::endl; 159 | detector->save_image(batch_img[i], p.string(), name); 160 | } 161 | } 162 | } 163 | } 164 | //disp 165 | for (int i=0;idraw_BBox(batch_img[i], r); 169 | } 170 | if (dont_show == true) { 171 | continue; 172 | } 173 | cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL); 174 | cv::imshow("image"+std::to_string(i), batch_img[i]); 175 | int k = cv::waitKey(0); 176 | if (k == 32) { 177 | cv::Mat tmp = cv::imread(file.path(), cv::IMREAD_UNCHANGED); 178 | std::cout << "Save... " << name << std::endl; 179 | detector->save_image(tmp, "log/", name); 180 | cv::waitKey(0); 181 | } 182 | } 183 | if (flg_save) { 184 | fs::path p = save_path; 185 | p.append("detections"); 186 | detector->save_image(batch_img[0], p.string(), name); 187 | } 188 | } 189 | } else if (videoPath != "" || cam_id != -1){ 190 | std::cout << videoPath << std::endl; 191 | cv::VideoCapture video; 192 | if (cam_id != -1) { 193 | video.open(cam_id); 194 | } else { 195 | video.open(videoPath); 196 | } 197 | cv::Mat frame; 198 | int count = 0; 199 | cv::namedWindow("image" + std::to_string(0), cv::WINDOW_AUTOSIZE); 200 | while (1) { 201 | video >> frame; 202 | if (frame.empty() == true) break; 203 | 204 | std::vector batch_img; 205 | batch_img.push_back(frame); 206 | 207 | if (cam_id != -1 && flg_save) { 208 | std::ostringstream sout; 209 | sout << std::setfill('0') << std::setw(6) << count; 210 | std::string name = "frame_" + sout.str() + ".jpg"; 211 | fs::path p = save_path; 212 | p.append("JPEGImages"); 213 | fs::create_directory(p); 214 | std::cout << "Save... " << name << std::endl; 215 | detector->save_image(frame, p.string(), name); 216 | } 217 | 218 | detector->detect(batch_img, batch_res, cuda); 219 | detector->segment(batch_img, ""); 220 | detector->regress(batch_img, batch_res, ""); 221 | 222 | //disp 223 | for (int i=0;idraw_BBox(batch_img[i], r); 227 | } 228 | if (!dont_show) { 229 | //cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL); 230 | //cv::namedWindow("image" + std::to_string(i), cv::WINDOW_KEEPRATIO); 231 | //cv::ResizeWindow("image" + std::to_string(i), 960, 640); 232 | cv::imshow("image"+std::to_string(i), batch_img[i]); 233 | } 234 | if (flg_save) { 235 | std::ostringstream sout; 236 | sout << std::setfill('0') << std::setw(6) << count; 237 | std::string name = "frame_" + sout.str() + ".jpg"; 238 | fs::path p = save_path; 239 | p.append("detections"); 240 | detector->save_image(batch_img[i], p.string(), name); 241 | } 242 | if (i > 0) { 243 | count++; 244 | } 245 | } 246 | count++; 247 | if (cv::waitKey(1) == 'q') break; 248 | } 249 | } 250 | if (get_prof_flg()) { 251 | detector->dump_profiling(); 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /src/lightnet_trt_core.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 TIER IV 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "lightnet_trt_core.hpp" 16 | 17 | LightNetTensorRT::LightNetTensorRT(const ::Config &config) 18 | { 19 | const bool cuda = get_cuda_flg(); 20 | 21 | detector_ = std::make_unique<::Detector>(); 22 | detector_->init(config); 23 | } 24 | 25 | void LightNetTensorRT::doInference(const std::vector & images, std::vector & masks) 26 | { 27 | std::vector batch_res; 28 | detector_->detect(images, batch_res, cuda); 29 | detector_->get_mask(masks); 30 | } 31 | -------------------------------------------------------------------------------- /src/lightnet_trt_core.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 TIER IV 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef LIGHTNET_TRT__LIGHTNET_TRT_CORE_HPP_ 16 | #define LIGHTNET_TRT__LIGHTNET_TRT_CORE_HPP_ 17 | 18 | #include "class_detector.h" 19 | #include "yolo_config_parser.h" 20 | 21 | #include 22 | 23 | #include 24 | 25 | class LightNetTensorRT 26 | { 27 | public: 28 | LightNetTensorRT(const ::Config &config); 29 | 30 | void doInference(const std::vector & images, std::vector & masks); 31 | 32 | std::unique_ptr detector_; 33 | const bool cuda = false; 34 | }; 35 | #endif // LIGHTNET_TRT__LIGHTNET_TRT_CORE_HPP_ 36 | -------------------------------------------------------------------------------- /src/lightnet_trt_node.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 TIER IV 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "lightnet_trt_node.hpp" 16 | 17 | LightNetTensorRTNode::LightNetTensorRTNode(const rclcpp::NodeOptions &node_options) 18 | : Node("tensorrt_lightnet", node_options), 19 | ipm_projector_(nullptr), 20 | base_frame_("base_link") 21 | { 22 | transform_listener_ = std::make_shared(this); 23 | 24 | // Initialize a detector 25 | config_ = loadConfig(); 26 | 27 | RCLCPP_INFO(this->get_logger(), "Start loading YOLO"); 28 | RCLCPP_INFO(this->get_logger(), "Model Config: %s", config_.file_model_cfg.c_str()); 29 | RCLCPP_INFO(this->get_logger(), "Model Weights: %s", config_.file_model_weights.c_str()); 30 | RCLCPP_INFO(this->get_logger(), "Input size: (%d, %d)", config_.width, config_.height); 31 | lightnet_trt_ = std::make_unique(config_); 32 | RCLCPP_INFO(this->get_logger(), "Finished loading YOLO"); 33 | 34 | image_sub_ = image_transport::create_subscription( 35 | this, "~/in/image", [this](const sensor_msgs::msg::Image::ConstSharedPtr msg) { onImage(msg); }, "raw", 36 | rmw_qos_profile_sensor_data); 37 | camera_info_sub_ = this->create_subscription( 38 | "~/in/camera_info", rclcpp::SensorDataQoS(), 39 | [this](const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg) { onCameraInfo(msg); }); 40 | 41 | image_pub_ = image_transport::create_publisher(this, "~/out/image"); 42 | image_projected_pub_ = image_transport::create_publisher(this, "~/out/image_projected"); 43 | } 44 | 45 | ::Config LightNetTensorRTNode::loadConfig() 46 | { 47 | const std::string model_cfg = declare_parameter("model_cfg"); 48 | const std::string model_weights = declare_parameter("model_weights"); 49 | const int width = declare_parameter("width"); 50 | const int height = declare_parameter("height"); 51 | 52 | // Initialize a detector 53 | ::Config config; 54 | 55 | config.net_type = YOLOV4; 56 | config.file_model_cfg = model_cfg; 57 | config.file_model_weights = model_weights; 58 | config.inference_precison = FP32; 59 | config.batch = 1; 60 | config.width = width; 61 | config.height = height; 62 | config.dla = -1; 63 | 64 | return config; 65 | } 66 | 67 | void LightNetTensorRTNode::onCameraInfo(const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg) 68 | { 69 | if (ipm_projector_ != nullptr) return; 70 | const Eigen::Matrix3f intrinsic = get_intrinsic_matrix(*msg); 71 | const Eigen::Matrix3f intrinsic_resized = resize_intrinsic_matrix(intrinsic, config_.width, config_.height); 72 | 73 | const std::string camera_frame = msg->header.frame_id; 74 | geometry_msgs::msg::TransformStamped::ConstSharedPtr tf_base2cam_ptr = 75 | transform_listener_->getLatestTransform(base_frame_, camera_frame); 76 | if (!tf_base2cam_ptr) return; 77 | 78 | const Eigen::Matrix4f extrinsic = tf2::transformToEigen(tf_base2cam_ptr->transform).matrix().cast(); 79 | ipm_projector_ = std::make_unique(intrinsic_resized, extrinsic); 80 | return; 81 | } 82 | 83 | void LightNetTensorRTNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) 84 | { 85 | RCLCPP_INFO(this->get_logger(), "Image received"); 86 | cv_bridge::CvImagePtr in_image_ptr; 87 | in_image_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8); 88 | 89 | std::vector masks; 90 | lightnet_trt_->doInference({in_image_ptr->image}, masks); 91 | RCLCPP_INFO(this->get_logger(), "Inference succeeded"); 92 | 93 | // Publish result 94 | cv_bridge::CvImage out_image; 95 | out_image.header = msg->header; 96 | out_image.encoding = sensor_msgs::image_encodings::BGR8; 97 | out_image.image = masks[0]; 98 | image_pub_.publish(out_image.toImageMsg()); 99 | 100 | // Post processing 101 | if (!ipm_projector_) 102 | { 103 | RCLCPP_WARN(this->get_logger(), "IPM projector is not initialized."); 104 | return; 105 | } 106 | RCLCPP_WARN(this->get_logger(), "IPM projector running."); 107 | 108 | cv::Mat projected_mask; 109 | ipm_projector_->run(masks[0], projected_mask); 110 | 111 | // Publish result 112 | cv_bridge::CvImage out_projected_image; 113 | out_projected_image.header = msg->header; 114 | out_projected_image.encoding = sensor_msgs::image_encodings::BGR8; 115 | out_projected_image.image = projected_mask; 116 | image_projected_pub_.publish(out_projected_image.toImageMsg()); 117 | } 118 | 119 | #include "rclcpp_components/register_node_macro.hpp" 120 | RCLCPP_COMPONENTS_REGISTER_NODE(LightNetTensorRTNode) 121 | -------------------------------------------------------------------------------- /src/lightnet_trt_node.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 TIER IV 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_ 16 | #define LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_ 17 | 18 | #include "lightnet_trt_core.hpp" 19 | #include "utils.hpp" 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | class LightNetTensorRTNode : public rclcpp::Node 32 | { 33 | public: 34 | explicit LightNetTensorRTNode(const rclcpp::NodeOptions &node_options); 35 | 36 | private: 37 | void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg); 38 | void onCameraInfo(const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg); 39 | ::Config loadConfig(); 40 | 41 | // ROS related variables 42 | image_transport::Subscriber image_sub_; 43 | rclcpp::Subscription::SharedPtr camera_info_sub_; 44 | image_transport::Publisher image_pub_; 45 | image_transport::Publisher image_projected_pub_; 46 | std::shared_ptr transform_listener_; 47 | 48 | // Tools 49 | std::unique_ptr lightnet_trt_; 50 | std::unique_ptr ipm_projector_; 51 | 52 | // Misc 53 | bool received_camera_info_; 54 | const std::string base_frame_; 55 | ::Config config_; 56 | }; 57 | #endif // LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_ 58 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 TIER IV 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "lightnet_trt_node.hpp" 16 | 17 | #include 18 | 19 | int main(int argc, char ** argv) 20 | { 21 | rclcpp::init(argc, argv); 22 | rclcpp::NodeOptions node_options; 23 | auto node = std::make_shared(node_options); 24 | 25 | rclcpp::spin(node); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 TIER IV 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "utils.hpp" 16 | 17 | Eigen::Matrix3f get_intrinsic_matrix(const sensor_msgs::msg::CameraInfo & camera_info) 18 | { 19 | Eigen::Matrix3f intrinsic_matrix; 20 | intrinsic_matrix << camera_info.k[0], camera_info.k[1], camera_info.k[2], 21 | camera_info.k[3], camera_info.k[4], camera_info.k[5], 22 | camera_info.k[6], camera_info.k[7], camera_info.k[8]; 23 | return intrinsic_matrix; 24 | } 25 | 26 | Eigen::Matrix3f resize_intrinsic_matrix(const Eigen::Matrix3f & intrinsic, const int resized_width, const int resized_height) 27 | { 28 | Eigen::Matrix3f resized_intrinsic; 29 | const float width_ratio = resized_width / 2.0 / intrinsic(0, 2); 30 | const float height_ratio = resized_height / 2.0 / intrinsic(1, 2); 31 | resized_intrinsic << intrinsic(0, 0) * width_ratio, 0, resized_width / 2, 32 | 0, intrinsic(1, 1) * height_ratio, resized_height / 2, 33 | 0, 0, 1; 34 | return resized_intrinsic; 35 | } 36 | 37 | // TODO: Implement this in a smarter way 38 | IPM::IPM(const Eigen::Matrix3f & intrinsic, const Eigen::Matrix4f & extrinsic) 39 | { 40 | // Define points on the ground plane in the world coordinate system 41 | // 50 x 20 m rectangle 42 | const Eigen::Vector4f dst_p1{55, -10, 0, 1}; 43 | const Eigen::Vector4f dst_p2{55, 10, 0, 1}; 44 | const Eigen::Vector4f dst_p3{5, 10, 0, 1}; 45 | const Eigen::Vector4f dst_p4{5, -10, 0, 1}; 46 | 47 | // Convert the points from the world coordinate system to the camera coordinate system 48 | const Eigen::Matrix4f extrinsic_inv = extrinsic.inverse(); 49 | Eigen::Vector3f src_p1 = (extrinsic_inv * dst_p1).head<3>(); 50 | Eigen::Vector3f src_p2 = (extrinsic_inv * dst_p2).head<3>(); 51 | Eigen::Vector3f src_p3 = (extrinsic_inv * dst_p3).head<3>(); 52 | Eigen::Vector3f src_p4 = (extrinsic_inv * dst_p4).head<3>(); 53 | 54 | // Project the points from 3D camera coordinate system to 2D image plane 55 | src_p1 = intrinsic * src_p1; src_p1 /= src_p1(2); 56 | src_p2 = intrinsic * src_p2; src_p2 /= src_p2(2); 57 | src_p3 = intrinsic * src_p3; src_p3 /= src_p3(2); 58 | src_p4 = intrinsic * src_p4; src_p4 /= src_p4(2); 59 | 60 | // Create source and destination points for cv::getPerspectiveTransform 61 | const std::vector src_pts = { 62 | cv::Point2f(src_p1(0), src_p1(1)), 63 | cv::Point2f(src_p2(0), src_p2(1)), 64 | cv::Point2f(src_p3(0), src_p3(1)), 65 | cv::Point2f(src_p4(0), src_p4(1)) 66 | }; 67 | const std::vector dst_pts = { 68 | cv::Point2f(dst_p3(1) * 12 + 320, dst_p3(0) * 12), 69 | cv::Point2f(dst_p4(1) * 12 + 320, dst_p4(0) * 12), 70 | cv::Point2f(dst_p1(1) * 12 + 320, dst_p1(0) * 12), 71 | cv::Point2f(dst_p2(1) * 12 + 320, dst_p2(0) * 12) 72 | }; 73 | 74 | perspective_transform_ = cv::getPerspectiveTransform(src_pts, dst_pts); 75 | } 76 | 77 | void IPM::run(const cv::Mat & img, cv::Mat & output_img) 78 | { 79 | cv::warpPerspective(img, output_img, perspective_transform_, cv::Size(640, 640)); 80 | } -------------------------------------------------------------------------------- /src/utils.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2023 TIER IV 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef LIGHTNET_TRT__UTILS_HPP_ 16 | #define LIGHTNET_TRT__UTILS_HPP_ 17 | 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include "sensor_msgs/msg/camera_info.hpp" 24 | #include "geometry_msgs/msg/transform.hpp" 25 | 26 | Eigen::Matrix3f get_intrinsic_matrix(const sensor_msgs::msg::CameraInfo & camera_info); 27 | Eigen::Matrix3f resize_intrinsic_matrix(const Eigen::Matrix3f & intrinsic, const int resized_width, const int resized_height); 28 | 29 | class IPM 30 | { 31 | public: 32 | IPM(const Eigen::Matrix3f & intrinsic, const Eigen::Matrix4f & extrinsic); 33 | void run(const cv::Mat & img, cv::Mat & output_img); 34 | private: 35 | cv::Mat perspective_transform_; 36 | }; 37 | 38 | #endif // LIGHTNET_TRT__UTILS_HPP_ 39 | --------------------------------------------------------------------------------