├── CMakeLists.txt ├── LICENSE ├── README.md ├── asset ├── Bench_YOLO_V11.JPG ├── Yolo_v11_cpp_tenosrrt.PNG ├── output.gif └── output.mp4 ├── include ├── YOLOv11.h ├── common.h ├── cuda_utils.h ├── logging.h ├── macros.h └── preprocess.h ├── main.cpp └── src ├── YOLOv11.cpp └── preprocess.cu /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | 3 | # Project declaration with C++ and CUDA support 4 | project(YOLOv11TRT LANGUAGES CXX CUDA) 5 | 6 | # Set C++ standard to C++17 7 | set(CMAKE_CXX_STANDARD 17) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 9 | set(CMAKE_CXX_EXTENSIONS OFF) 10 | 11 | # Define the path to TensorRT installation 12 | set(TENSORRT_PATH "F:/Program Files/TensorRT-8.6.1.6") # Update this to the actual path for TensorRT 13 | 14 | # Define the path to OpenCV installation 15 | 16 | # Allow overriding TensorRT and OpenCV paths via command line 17 | # e.g., cmake -DTENSORRT_PATH="path/to/TensorRT" -DOpenCV_DIR="path/to/OpenCV" .. 18 | option(TENSORRT_PATH_OPTION "Path to TensorRT installation" ${TENSORRT_PATH}) 19 | set(TENSORRT_PATH ${TENSORRT_PATH_OPTION} CACHE PATH "Path to TensorRT installation") 20 | 21 | # Find OpenCV 22 | find_package(OpenCV REQUIRED) 23 | if(NOT OpenCV_FOUND) 24 | message(FATAL_ERROR "OpenCV not found. Please install OpenCV or set OpenCV_DIR.") 25 | endif() 26 | 27 | # Find CUDA 28 | find_package(CUDA REQUIRED) 29 | if(NOT CUDA_FOUND) 30 | message(FATAL_ERROR "CUDA not found. Please install the CUDA Toolkit.") 31 | endif() 32 | 33 | # Include directories for TensorRT 34 | include_directories(${TENSORRT_PATH}/include) 35 | 36 | # Include directory for your project 37 | include_directories(${CMAKE_SOURCE_DIR}/include) 38 | 39 | # Define source files (including CUDA sources) 40 | set(SOURCES 41 | main.cpp 42 | src/yolov11.cpp 43 | src/preprocess.cu 44 | ) 45 | 46 | # Create executable (CMake handles CUDA sources automatically) 47 | add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS}) 48 | 49 | # Define API_EXPORTS macro 50 | target_compile_definitions(${PROJECT_NAME} PRIVATE API_EXPORTS) 51 | 52 | # Specify include directories (modern CMake approach) 53 | target_include_directories(${PROJECT_NAME} PRIVATE 54 | src/ 55 | ${OpenCV_INCLUDE_DIRS} 56 | ${CUDA_INCLUDE_DIRS} 57 | ${TENSORRT_PATH}/include 58 | ) 59 | 60 | # Link TensorRT libraries 61 | # Specify full paths to TensorRT libraries to avoid relying on link_directories 62 | set(TENSORRT_LIBS 63 | "${TENSORRT_PATH}/lib/nvinfer.lib" 64 | "${TENSORRT_PATH}/lib/nvonnxparser.lib" 65 | "${TENSORRT_PATH}/lib/nvparsers.lib" 66 | "${TENSORRT_PATH}/lib/nvinfer_plugin.lib" 67 | ) 68 | 69 | # Link libraries to the target 70 | target_link_libraries(${PROJECT_NAME} PRIVATE 71 | ${OpenCV_LIBS} 72 | ${CUDA_LIBRARIES} 73 | ${TENSORRT_LIBS} 74 | ) 75 | 76 | # Enable separable compilation for CUDA (optional but recommended) 77 | set_target_properties(${PROJECT_NAME} PROPERTIES 78 | CUDA_SEPARABLE_COMPILATION ON 79 | ) 80 | 81 | # (Optional) Specify CUDA architectures based on your GPU hardware 82 | # set(CMAKE_CUDA_ARCHITECTURES 75) # Example for Turing architecture 83 | 84 | # (Optional) Set output directories for binaries 85 | # set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Custom License Agreement 2 | 3 | 1. License Grant You are hereby granted a non-exclusive, non-transferable license to use, reproduce, and distribute the code (hereinafter referred to as "the Software") under the following conditions: 4 | 5 | 2. Conditions of Use 6 | 7 | Non-Commercial Use: You may use the Software for personal, educational, or non-commercial purposes without any additional permissions. 8 | Commercial Use: Any commercial use of the Software, including but not limited to selling, licensing, or using it in a commercial product, requires prior written permission from the original developer. 9 | 3. Contact Requirement 10 | 11 | If you wish to use the Software for commercial purposes, you must contact the original developer at [https://www.linkedin.com/in/hamdi-boukamcha/] to obtain a commercial license. 12 | The terms of any commercial license will be mutually agreed upon and may involve a licensing fee. 13 | 4. Attribution 14 | 15 | Regardless of whether you are using the Software for commercial or non-commercial purposes, you must provide appropriate credit to the original developer in any distributions or products that use the Software. 16 | 5. Disclaimer of Warranty 17 | 18 | The Software is provided "as is," without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the original developer be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the Software or the use or other dealings in the Software. 19 | 6. Governing Law 20 | 21 | This License Agreement shall be governed by and construed in accordance with the laws of France. 22 | By using the Software, you agree to abide by the terms outlined in this License Agreement. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv11 C++ TensorRT 2 | ![Inference Time of YOLOv11 ](asset/Yolo_v11_cpp_tenosrrt.PNG) 3 | ![Inference Time of YOLOv11 ](asset/output.gif) 4 | 5 | 6 | GitHub 7 | 8 | 9 | 10 | License 11 | 12 | 13 | ## 📜 Citation 14 | 15 | The **YOLOv11 C++ TensorRT Project** is a high-performance object detection solution implemented in **C++** and optimized using **NVIDIA TensorRT**. This project leverages the YOLOv11 model to deliver fast and accurate object detection, utilizing TensorRT to maximize inference efficiency and performance. 16 | 17 | --- 18 | 19 | ## 📢 Updates 20 | 21 | ### Key Features: 22 | - **Model Conversion**: Convert ONNX models to TensorRT engine files to accelerate inference. 23 | - **Inference on Videos**: Efficiently perform object detection on video files. 24 | - **Inference on Images**: Execute object detection on individual images. 25 | - **High Efficiency**: Optimized for real-time object detection using NVIDIA GPUs. 26 | - **Preprocessing with CUDA**: CUDA-enabled preprocessing for faster input handling. 27 | 28 | ![Benchmark Inference Time of YOLOv11 Models](asset/Bench_YOLO_V11.JPG) 29 | 30 | --- 31 | ## 📂 Project Structure 32 | 33 | YOLOv11-TensorRT/ 34 | ├── CMakeLists.txt # Build configuration for the project 35 | ├── include/ # Header files 36 | ├── src/ 37 | │ ├── main.cpp # Main entry point for the application 38 | │ ├── yolov11.cpp # YOLOv11 implementation 39 | │ └── preprocess.cu # CUDA preprocessing code 40 | ├── assets/ # Images and benchmarks for README 41 | └── build/ # Compiled binaries 42 | 43 | ## 🛠️ Setup 44 | 45 | ### Prerequisites 46 | 47 | - **CMake** (version 3.18 or higher) 48 | - **TensorRT** (V8.6.1.6: For optimized inference with YOLOv11.) 49 | - **CUDA Toolkit** (V11.7: For GPU acceleration) 50 | - **OpenCV** (V4.10.0: For image and video processing) 51 | - **NVIDIA GPU** (with compute capability 7.5 or higher) 52 | 53 | ### Installation 54 | 55 | 1. Clone the repository: 56 | ```bash 57 | git clone https://github.com/hamdiboukamcha/Yolo-V11-cpp-TensorRT.git 58 | cd YOLOv11-TensorRT 59 | 2. Update the TensorRT and OpenCV paths in CMakeLists.txt: 60 | ```bash 61 | set(TENSORRT_PATH "F:/Program Files/TensorRT-8.6.1.6") # Adjust this to your path 62 | 63 | 4. Build the project: 64 | ```bash 65 | mkdir build 66 | cd build 67 | cmake .. 68 | make -j$(nproc) 69 | ## 🚀 Usage 70 | 71 | ### Convert Yolov11 To ONNX Model 72 | from ultralytics import YOLO 73 | Load the YOLO model 74 | model = YOLO("yolo11s.pt") 75 | #Export the model to ONNX format 76 | export_path = model.export(format="onnx") 77 | 78 | ### Convert ONNX Model to TensorRT Engine 79 | 80 | To convert an ONNX model to a TensorRT engine file, use the following command: 81 | 82 | ./YOLOv11TRT convert path_to_your_model.onnx path_to_your_engine.engine. 83 | 84 | path_to_your_model.onnx: Path to the ONNX model file. 85 | 86 | path_to_your_engine.engine: Path where the TensorRT engine file will be saved. 87 | 88 | ### Run Inference on Video 89 | To run inference on a video, use the following command: 90 | 91 | ./YOLOv11TRT infer_video path_to_your_video.mp4 path_to_your_engine.engine 92 | 93 | path_to_your_video.mp4: Path to the input video file. 94 | 95 | path_to_your_engine.engine: Path to the TensorRT engine file. 96 | 97 | ### Run Inference on Video 98 | Run Inference on Image 99 | To run inference on an image, use the following command: 100 | 101 | ./YOLOv11TRT infer_image path_to_your_image.jpg path_to_your_engine.engine 102 | 103 | path_to_your_image.jpg: Path to the input image file. 104 | 105 | path_to_your_engine.engine: Path to the TensorRT engine file. 106 | 107 | ## ⚙️ Configuration 108 | 109 | ### CMake Configuration 110 | In the CMakeLists.txt, update the paths for TensorRT and OpenCV if they are installed in non-default locations: 111 | 112 | #### Set the path to TensorRT installation 113 | 114 | #Define the path to TensorRT installation 115 | set(TENSORRT_PATH "F:/Program Files/TensorRT-8.6.1.6") # Update this to the actual path for TensorRT 116 | 117 | Ensure that the path points to the directory where TensorRT is installed. 118 | 119 | ### Troubleshooting 120 | Cannot find nvinfer.lib: Ensure that TensorRT is correctly installed and that nvinfer.lib is in the specified path. Update CMakeLists.txt to include the correct path to TensorRT libraries. 121 | 122 | Linker Errors: Verify that all dependencies (OpenCV, CUDA, TensorRT) are correctly installed and that their paths are correctly set in CMakeLists.txt. 123 | 124 | Run-time Errors: Ensure that your system has the correct CUDA drivers and that TensorRT runtime libraries are accessible. Add TensorRT’s bin directory to your system PATH. 125 | 126 | ## 📞 Contact 127 | 128 | For advanced inquiries, feel free to contact me on LinkedIn: LinkedIn 129 | 130 | ## 📜 Citation 131 | 132 | If you use this code in your research, please cite the repository as follows: 133 | 134 | @misc{boukamcha2024yolov11, 135 | author = {Hamdi Boukamcha}, 136 | title = {Yolo-V11-cpp-TensorRT}, 137 | year = {2024}, 138 | publisher = {GitHub}, 139 | howpublished = {\url{https://github.com/hamdiboukamcha/Yolo-V11-cpp-TensorRT/}}, 140 | } 141 | 142 | 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /asset/Bench_YOLO_V11.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hamdiboukamcha/Yolo-V11-cpp-TensorRT/988adf14f25d120fae4d971a0f2186cfe69b4e72/asset/Bench_YOLO_V11.JPG -------------------------------------------------------------------------------- /asset/Yolo_v11_cpp_tenosrrt.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hamdiboukamcha/Yolo-V11-cpp-TensorRT/988adf14f25d120fae4d971a0f2186cfe69b4e72/asset/Yolo_v11_cpp_tenosrrt.PNG -------------------------------------------------------------------------------- /asset/output.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hamdiboukamcha/Yolo-V11-cpp-TensorRT/988adf14f25d120fae4d971a0f2186cfe69b4e72/asset/output.gif -------------------------------------------------------------------------------- /asset/output.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hamdiboukamcha/Yolo-V11-cpp-TensorRT/988adf14f25d120fae4d971a0f2186cfe69b4e72/asset/output.mp4 -------------------------------------------------------------------------------- /include/YOLOv11.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file YOLOv11.h 3 | * @brief Header file for the YOLOv11 object detection model using TensorRT and OpenCV. 4 | * 5 | * This class encapsulates the preprocessing, inference, and postprocessing steps required to 6 | * perform object detection using a YOLOv11 model with TensorRT. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include "NvInfer.h" 12 | #include 13 | 14 | using namespace nvinfer1; 15 | using namespace std; 16 | using namespace cv; 17 | 18 | /** 19 | * @struct Detection 20 | * @brief A structure representing a detected object. 21 | * 22 | * Contains the confidence score, class ID, and bounding box for a detected object. 23 | */ 24 | struct Detection 25 | { 26 | float conf; //!< Confidence score of the detection. 27 | int class_id; //!< Class ID of the detected object. 28 | Rect bbox; //!< Bounding box of the detected object. 29 | }; 30 | 31 | /** 32 | * @class YOLOv11 33 | * @brief A class for running YOLOv11 object detection using TensorRT and OpenCV. 34 | * 35 | * This class handles model initialization, inference, and postprocessing to detect objects 36 | * in images. 37 | */ 38 | class YOLOv11 39 | { 40 | public: 41 | 42 | /** 43 | * @brief Constructor to initialize the YOLOv11 object. 44 | * 45 | * Loads the model and initializes TensorRT objects. 46 | * 47 | * @param model_path Path to the model engine or ONNX file. 48 | * @param logger Reference to a TensorRT logger for error reporting. 49 | */ 50 | YOLOv11(string model_path, nvinfer1::ILogger& logger); 51 | 52 | /** 53 | * @brief Destructor to clean up resources. 54 | * 55 | * Frees the allocated memory and TensorRT resources. 56 | */ 57 | ~YOLOv11(); 58 | 59 | /** 60 | * @brief Preprocess the input image. 61 | * 62 | * Prepares the image for inference by resizing and normalizing it. 63 | * 64 | * @param image The input image to be preprocessed. 65 | */ 66 | void preprocess(Mat& image); 67 | 68 | /** 69 | * @brief Run inference on the preprocessed image. 70 | * 71 | * Executes the TensorRT engine for object detection. 72 | */ 73 | void infer(); 74 | 75 | /** 76 | * @brief Postprocess the output from the model. 77 | * 78 | * Filters and decodes the raw output from the TensorRT engine into detection results. 79 | * 80 | * @param output A vector to store the detected objects. 81 | */ 82 | void postprocess(vector& output); 83 | 84 | /** 85 | * @brief Draw the detected objects on the image. 86 | * 87 | * Overlays bounding boxes and class labels on the image for visualization. 88 | * 89 | * @param image The input image where the detections will be drawn. 90 | * @param output A vector of detections to be visualized. 91 | */ 92 | void draw(Mat& image, const vector& output); 93 | 94 | private: 95 | /** 96 | * @brief Initialize TensorRT components from the given engine file. 97 | * 98 | * @param engine_path Path to the serialized TensorRT engine file. 99 | * @param logger Reference to a TensorRT logger for error reporting. 100 | */ 101 | void init(std::string engine_path, nvinfer1::ILogger& logger); 102 | 103 | float* gpu_buffers[2]; //!< The vector of device buffers needed for engine execution. 104 | float* cpu_output_buffer; //!< Pointer to the output buffer on the host. 105 | 106 | cudaStream_t stream; //!< CUDA stream for asynchronous execution. 107 | IRuntime* runtime; //!< The TensorRT runtime used to deserialize the engine. 108 | ICudaEngine* engine; //!< The TensorRT engine used to run the network. 109 | IExecutionContext* context; //!< The context for executing inference using an ICudaEngine. 110 | 111 | // Model parameters 112 | int input_w; //!< Width of the input image. 113 | int input_h; //!< Height of the input image. 114 | int num_detections; //!< Number of detections output by the model. 115 | int detection_attribute_size; //!< Size of each detection attribute. 116 | int num_classes = 80; //!< Number of object classes that can be detected. 117 | const int MAX_IMAGE_SIZE = 4096 * 4096; //!< Maximum allowed input image size. 118 | float conf_threshold = 0.3f; //!< Confidence threshold for filtering detections. 119 | float nms_threshold = 0.4f; //!< Non-Maximum Suppression (NMS) threshold for filtering overlapping boxes. 120 | 121 | vector colors; //!< A vector of colors for drawing bounding boxes. 122 | 123 | /** 124 | * @brief Build the TensorRT engine from the ONNX model. 125 | * 126 | * @param onnxPath Path to the ONNX file. 127 | * @param logger Reference to a TensorRT logger for error reporting. 128 | */ 129 | void build(std::string onnxPath, nvinfer1::ILogger& logger); 130 | 131 | /** 132 | * @brief Save the TensorRT engine to a file. 133 | * 134 | * @param filename Path to save the serialized engine. 135 | * @return True if the engine was saved successfully, false otherwise. 136 | */ 137 | bool saveEngine(const std::string& filename); 138 | }; 139 | -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | const std::vector CLASS_NAMES = { 2 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", 3 | "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", 4 | "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", 5 | "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", 6 | "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", 7 | "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", 8 | "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", 9 | "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", 10 | "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", 11 | "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", 12 | "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", 13 | "teddy bear", "hair drier", "toothbrush" }; 14 | 15 | const std::vector> COLORS = { 16 | {0, 114, 189}, {217, 83, 25}, {237, 177, 32}, {126, 47, 142}, {119, 172, 48}, {77, 190, 238}, 17 | {162, 20, 47}, {76, 76, 76}, {153, 153, 153}, {255, 0, 0}, {255, 128, 0}, {191, 191, 0}, 18 | {0, 255, 0}, {0, 0, 255}, {170, 0, 255}, {85, 85, 0}, {85, 170, 0}, {85, 255, 0}, 19 | {170, 85, 0}, {170, 170, 0}, {170, 255, 0}, {255, 85, 0}, {255, 170, 0}, {255, 255, 0}, 20 | {0, 85, 128}, {0, 170, 128}, {0, 255, 128}, {85, 0, 128}, {85, 85, 128}, {85, 170, 128}, 21 | {85, 255, 128}, {170, 0, 128}, {170, 85, 128}, {170, 170, 128}, {170, 255, 128}, {255, 0, 128}, 22 | {255, 85, 128}, {255, 170, 128}, {255, 255, 128}, {0, 85, 255}, {0, 170, 255}, {0, 255, 255}, 23 | {85, 0, 255}, {85, 85, 255}, {85, 170, 255}, {85, 255, 255}, {170, 0, 255}, {170, 85, 255}, 24 | {170, 170, 255}, {170, 255, 255}, {255, 0, 255}, {255, 85, 255}, {255, 170, 255}, {85, 0, 0}, 25 | {128, 0, 0}, {170, 0, 0}, {212, 0, 0}, {255, 0, 0}, {0, 43, 0}, {0, 85, 0}, 26 | {0, 128, 0}, {0, 170, 0}, {0, 212, 0}, {0, 255, 0}, {0, 0, 43}, {0, 0, 85}, 27 | {0, 0, 128}, {0, 0, 170}, {0, 0, 212}, {0, 0, 255}, {0, 0, 0}, {36, 36, 36}, 28 | {73, 73, 73}, {109, 109, 109}, {146, 146, 146}, {182, 182, 182}, {219, 219, 219}, {0, 114, 189}, 29 | {80, 183, 189}, {128, 128, 0} }; -------------------------------------------------------------------------------- /include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef TRTX_CUDA_UTILS_H_ 2 | #define TRTX_CUDA_UTILS_H_ 3 | 4 | #include 5 | 6 | #ifndef CUDA_CHECK 7 | #define CUDA_CHECK(callstr)\ 8 | {\ 9 | cudaError_t error_code = callstr;\ 10 | if (error_code != cudaSuccess) {\ 11 | std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\ 12 | assert(0);\ 13 | }\ 14 | } 15 | #endif // CUDA_CHECK 16 | 17 | #endif // TRTX_CUDA_UTILS_H_ 18 | -------------------------------------------------------------------------------- /include/logging.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TENSORRT_LOGGING_H 18 | #define TENSORRT_LOGGING_H 19 | 20 | #include "NvInferRuntimeCommon.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "macros.h" 29 | 30 | using Severity = nvinfer1::ILogger::Severity; 31 | 32 | class LogStreamConsumerBuffer : public std::stringbuf 33 | { 34 | public: 35 | LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) 36 | : mOutput(stream) 37 | , mPrefix(prefix) 38 | , mShouldLog(shouldLog) 39 | { 40 | } 41 | 42 | LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) 43 | : mOutput(other.mOutput) 44 | { 45 | } 46 | 47 | ~LogStreamConsumerBuffer() 48 | { 49 | // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence 50 | // std::streambuf::pptr() gives a pointer to the current position of the output sequence 51 | // if the pointer to the beginning is not equal to the pointer to the current position, 52 | // call putOutput() to log the output to the stream 53 | if (pbase() != pptr()) 54 | { 55 | putOutput(); 56 | } 57 | } 58 | 59 | // synchronizes the stream buffer and returns 0 on success 60 | // synchronizing the stream buffer consists of inserting the buffer contents into the stream, 61 | // resetting the buffer and flushing the stream 62 | virtual int sync() 63 | { 64 | putOutput(); 65 | return 0; 66 | } 67 | 68 | void putOutput() 69 | { 70 | if (mShouldLog) 71 | { 72 | // prepend timestamp 73 | std::time_t timestamp = std::time(nullptr); 74 | tm* tm_local = std::localtime(×tamp); 75 | std::cout << "["; 76 | std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; 77 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; 78 | std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; 79 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; 80 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; 81 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; 82 | // std::stringbuf::str() gets the string contents of the buffer 83 | // insert the buffer contents pre-appended by the appropriate prefix into the stream 84 | mOutput << mPrefix << str(); 85 | // set the buffer to empty 86 | str(""); 87 | // flush the stream 88 | mOutput.flush(); 89 | } 90 | } 91 | 92 | void setShouldLog(bool shouldLog) 93 | { 94 | mShouldLog = shouldLog; 95 | } 96 | 97 | private: 98 | std::ostream& mOutput; 99 | std::string mPrefix; 100 | bool mShouldLog; 101 | }; 102 | 103 | //! 104 | //! \class LogStreamConsumerBase 105 | //! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer 106 | //! 107 | class LogStreamConsumerBase 108 | { 109 | public: 110 | LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) 111 | : mBuffer(stream, prefix, shouldLog) 112 | { 113 | } 114 | 115 | protected: 116 | LogStreamConsumerBuffer mBuffer; 117 | }; 118 | 119 | //! 120 | //! \class LogStreamConsumer 121 | //! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages. 122 | //! Order of base classes is LogStreamConsumerBase and then std::ostream. 123 | //! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field 124 | //! in LogStreamConsumer and then the address of the buffer is passed to std::ostream. 125 | //! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. 126 | //! Please do not change the order of the parent classes. 127 | //! 128 | class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream 129 | { 130 | public: 131 | //! \brief Creates a LogStreamConsumer which logs messages with level severity. 132 | //! Reportable severity determines if the messages are severe enough to be logged. 133 | LogStreamConsumer(Severity reportableSeverity, Severity severity) 134 | : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) 135 | , std::ostream(&mBuffer) // links the stream buffer with the stream 136 | , mShouldLog(severity <= reportableSeverity) 137 | , mSeverity(severity) 138 | { 139 | } 140 | 141 | LogStreamConsumer(LogStreamConsumer&& other) 142 | : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) 143 | , std::ostream(&mBuffer) // links the stream buffer with the stream 144 | , mShouldLog(other.mShouldLog) 145 | , mSeverity(other.mSeverity) 146 | { 147 | } 148 | 149 | void setReportableSeverity(Severity reportableSeverity) 150 | { 151 | mShouldLog = mSeverity <= reportableSeverity; 152 | mBuffer.setShouldLog(mShouldLog); 153 | } 154 | 155 | private: 156 | static std::ostream& severityOstream(Severity severity) 157 | { 158 | return severity >= Severity::kINFO ? std::cout : std::cerr; 159 | } 160 | 161 | static std::string severityPrefix(Severity severity) 162 | { 163 | switch (severity) 164 | { 165 | case Severity::kINTERNAL_ERROR: return "[F] "; 166 | case Severity::kERROR: return "[E] "; 167 | case Severity::kWARNING: return "[W] "; 168 | case Severity::kINFO: return "[I] "; 169 | case Severity::kVERBOSE: return "[V] "; 170 | default: assert(0); return ""; 171 | } 172 | } 173 | 174 | bool mShouldLog; 175 | Severity mSeverity; 176 | }; 177 | 178 | //! \class Logger 179 | //! 180 | //! \brief Class which manages logging of TensorRT tools and samples 181 | //! 182 | //! \details This class provides a common interface for TensorRT tools and samples to log information to the console, 183 | //! and supports logging two types of messages: 184 | //! 185 | //! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal) 186 | //! - Test pass/fail messages 187 | //! 188 | //! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is 189 | //! that the logic for controlling the verbosity and formatting of sample output is centralized in one location. 190 | //! 191 | //! In the future, this class could be extended to support dumping test results to a file in some standard format 192 | //! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run). 193 | //! 194 | //! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger 195 | //! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT 196 | //! library and messages coming from the sample. 197 | //! 198 | //! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the 199 | //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger 200 | //! object. 201 | 202 | class Logger : public nvinfer1::ILogger 203 | { 204 | public: 205 | Logger(Severity severity = Severity::kWARNING) 206 | : mReportableSeverity(severity) 207 | { 208 | } 209 | 210 | //! 211 | //! \enum TestResult 212 | //! \brief Represents the state of a given test 213 | //! 214 | enum class TestResult 215 | { 216 | kRUNNING, //!< The test is running 217 | kPASSED, //!< The test passed 218 | kFAILED, //!< The test failed 219 | kWAIVED //!< The test was waived 220 | }; 221 | 222 | //! 223 | //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger 224 | //! \return The nvinfer1::ILogger associated with this Logger 225 | //! 226 | //! TODO Once all samples are updated to use this method to register the logger with TensorRT, 227 | //! we can eliminate the inheritance of Logger from ILogger 228 | //! 229 | nvinfer1::ILogger& getTRTLogger() 230 | { 231 | return *this; 232 | } 233 | 234 | //! 235 | //! \brief Implementation of the nvinfer1::ILogger::log() virtual method 236 | //! 237 | //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the 238 | //! inheritance from nvinfer1::ILogger 239 | //! 240 | void log(Severity severity, const char* msg) TRT_NOEXCEPT override 241 | { 242 | LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; 243 | } 244 | 245 | //! 246 | //! \brief Method for controlling the verbosity of logging output 247 | //! 248 | //! \param severity The logger will only emit messages that have severity of this level or higher. 249 | //! 250 | void setReportableSeverity(Severity severity) 251 | { 252 | mReportableSeverity = severity; 253 | } 254 | 255 | //! 256 | //! \brief Opaque handle that holds logging information for a particular test 257 | //! 258 | //! This object is an opaque handle to information used by the Logger to print test results. 259 | //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used 260 | //! with Logger::reportTest{Start,End}(). 261 | //! 262 | class TestAtom 263 | { 264 | public: 265 | TestAtom(TestAtom&&) = default; 266 | 267 | private: 268 | friend class Logger; 269 | 270 | TestAtom(bool started, const std::string& name, const std::string& cmdline) 271 | : mStarted(started) 272 | , mName(name) 273 | , mCmdline(cmdline) 274 | { 275 | } 276 | 277 | bool mStarted; 278 | std::string mName; 279 | std::string mCmdline; 280 | }; 281 | 282 | //! 283 | //! \brief Define a test for logging 284 | //! 285 | //! \param[in] name The name of the test. This should be a string starting with 286 | //! "TensorRT" and containing dot-separated strings containing 287 | //! the characters [A-Za-z0-9_]. 288 | //! For example, "TensorRT.sample_googlenet" 289 | //! \param[in] cmdline The command line used to reproduce the test 290 | // 291 | //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). 292 | //! 293 | static TestAtom defineTest(const std::string& name, const std::string& cmdline) 294 | { 295 | return TestAtom(false, name, cmdline); 296 | } 297 | 298 | //! 299 | //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments 300 | //! as input 301 | //! 302 | //! \param[in] name The name of the test 303 | //! \param[in] argc The number of command-line arguments 304 | //! \param[in] argv The array of command-line arguments (given as C strings) 305 | //! 306 | //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). 307 | static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) 308 | { 309 | auto cmdline = genCmdlineString(argc, argv); 310 | return defineTest(name, cmdline); 311 | } 312 | 313 | //! 314 | //! \brief Report that a test has started. 315 | //! 316 | //! \pre reportTestStart() has not been called yet for the given testAtom 317 | //! 318 | //! \param[in] testAtom The handle to the test that has started 319 | //! 320 | static void reportTestStart(TestAtom& testAtom) 321 | { 322 | reportTestResult(testAtom, TestResult::kRUNNING); 323 | assert(!testAtom.mStarted); 324 | testAtom.mStarted = true; 325 | } 326 | 327 | //! 328 | //! \brief Report that a test has ended. 329 | //! 330 | //! \pre reportTestStart() has been called for the given testAtom 331 | //! 332 | //! \param[in] testAtom The handle to the test that has ended 333 | //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, 334 | //! TestResult::kFAILED, TestResult::kWAIVED 335 | //! 336 | static void reportTestEnd(const TestAtom& testAtom, TestResult result) 337 | { 338 | assert(result != TestResult::kRUNNING); 339 | assert(testAtom.mStarted); 340 | reportTestResult(testAtom, result); 341 | } 342 | 343 | static int reportPass(const TestAtom& testAtom) 344 | { 345 | reportTestEnd(testAtom, TestResult::kPASSED); 346 | return EXIT_SUCCESS; 347 | } 348 | 349 | static int reportFail(const TestAtom& testAtom) 350 | { 351 | reportTestEnd(testAtom, TestResult::kFAILED); 352 | return EXIT_FAILURE; 353 | } 354 | 355 | static int reportWaive(const TestAtom& testAtom) 356 | { 357 | reportTestEnd(testAtom, TestResult::kWAIVED); 358 | return EXIT_SUCCESS; 359 | } 360 | 361 | static int reportTest(const TestAtom& testAtom, bool pass) 362 | { 363 | return pass ? reportPass(testAtom) : reportFail(testAtom); 364 | } 365 | 366 | Severity getReportableSeverity() const 367 | { 368 | return mReportableSeverity; 369 | } 370 | 371 | private: 372 | //! 373 | //! \brief returns an appropriate string for prefixing a log message with the given severity 374 | //! 375 | static const char* severityPrefix(Severity severity) 376 | { 377 | switch (severity) 378 | { 379 | case Severity::kINTERNAL_ERROR: return "[F] "; 380 | case Severity::kERROR: return "[E] "; 381 | case Severity::kWARNING: return "[W] "; 382 | case Severity::kINFO: return "[I] "; 383 | case Severity::kVERBOSE: return "[V] "; 384 | default: assert(0); return ""; 385 | } 386 | } 387 | 388 | //! 389 | //! \brief returns an appropriate string for prefixing a test result message with the given result 390 | //! 391 | static const char* testResultString(TestResult result) 392 | { 393 | switch (result) 394 | { 395 | case TestResult::kRUNNING: return "RUNNING"; 396 | case TestResult::kPASSED: return "PASSED"; 397 | case TestResult::kFAILED: return "FAILED"; 398 | case TestResult::kWAIVED: return "WAIVED"; 399 | default: assert(0); return ""; 400 | } 401 | } 402 | 403 | //! 404 | //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity 405 | //! 406 | static std::ostream& severityOstream(Severity severity) 407 | { 408 | return severity >= Severity::kINFO ? std::cout : std::cerr; 409 | } 410 | 411 | //! 412 | //! \brief method that implements logging test results 413 | //! 414 | static void reportTestResult(const TestAtom& testAtom, TestResult result) 415 | { 416 | severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " 417 | << testAtom.mCmdline << std::endl; 418 | } 419 | 420 | //! 421 | //! \brief generate a command line string from the given (argc, argv) values 422 | //! 423 | static std::string genCmdlineString(int argc, char const* const* argv) 424 | { 425 | std::stringstream ss; 426 | for (int i = 0; i < argc; i++) 427 | { 428 | if (i > 0) 429 | ss << " "; 430 | ss << argv[i]; 431 | } 432 | return ss.str(); 433 | } 434 | 435 | Severity mReportableSeverity; 436 | }; 437 | 438 | namespace 439 | { 440 | 441 | //! 442 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE 443 | //! 444 | //! Example usage: 445 | //! 446 | //! LOG_VERBOSE(logger) << "hello world" << std::endl; 447 | //! 448 | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) 449 | { 450 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); 451 | } 452 | 453 | //! 454 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO 455 | //! 456 | //! Example usage: 457 | //! 458 | //! LOG_INFO(logger) << "hello world" << std::endl; 459 | //! 460 | inline LogStreamConsumer LOG_INFO(const Logger& logger) 461 | { 462 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); 463 | } 464 | 465 | //! 466 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING 467 | //! 468 | //! Example usage: 469 | //! 470 | //! LOG_WARN(logger) << "hello world" << std::endl; 471 | //! 472 | inline LogStreamConsumer LOG_WARN(const Logger& logger) 473 | { 474 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); 475 | } 476 | 477 | //! 478 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR 479 | //! 480 | //! Example usage: 481 | //! 482 | //! LOG_ERROR(logger) << "hello world" << std::endl; 483 | //! 484 | inline LogStreamConsumer LOG_ERROR(const Logger& logger) 485 | { 486 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); 487 | } 488 | 489 | //! 490 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR 491 | // ("fatal" severity) 492 | //! 493 | //! Example usage: 494 | //! 495 | //! LOG_FATAL(logger) << "hello world" << std::endl; 496 | //! 497 | inline LogStreamConsumer LOG_FATAL(const Logger& logger) 498 | { 499 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); 500 | } 501 | 502 | } // anonymous namespace 503 | 504 | #endif // TENSORRT_LOGGING_H -------------------------------------------------------------------------------- /include/macros.h: -------------------------------------------------------------------------------- 1 | #ifndef __MACROS_H 2 | #define __MACROS_H 3 | 4 | #ifdef API_EXPORTS 5 | #if defined(_MSC_VER) 6 | #define API __declspec(dllexport) 7 | #else 8 | #define API __attribute__((visibility("default"))) 9 | #endif 10 | #else 11 | 12 | #if defined(_MSC_VER) 13 | #define API __declspec(dllimport) 14 | #else 15 | #define API 16 | #endif 17 | #endif // API_EXPORTS 18 | 19 | #if NV_TENSORRT_MAJOR >= 8 20 | #define TRT_NOEXCEPT noexcept 21 | #define TRT_CONST_ENQUEUE const 22 | #else 23 | #define TRT_NOEXCEPT 24 | #define TRT_CONST_ENQUEUE 25 | #endif 26 | 27 | #endif // __MACROS_H -------------------------------------------------------------------------------- /include/preprocess.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cuda_preprocess.h 3 | * @brief Header file for CUDA-based image preprocessing functions. 4 | * 5 | * This file contains functions for initializing, destroying, and running image preprocessing 6 | * using CUDA for accelerating operations like resizing and data format conversion. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | /** 16 | * @brief Initialize CUDA resources for image preprocessing. 17 | * 18 | * Allocates resources and sets up the necessary environment for performing image preprocessing 19 | * on the GPU. This function should be called once before using any preprocessing functions. 20 | * 21 | * @param max_image_size The maximum image size (in pixels) that will be processed. 22 | */ 23 | void cuda_preprocess_init(int max_image_size); 24 | 25 | /** 26 | * @brief Clean up and release CUDA resources. 27 | * 28 | * Frees any memory and resources allocated during initialization. This function should be 29 | * called when the preprocessing operations are no longer needed. 30 | */ 31 | void cuda_preprocess_destroy(); 32 | 33 | /** 34 | * @brief Preprocess an image using CUDA. 35 | * 36 | * This function resizes and converts the input image data (from uint8 to float) using CUDA 37 | * for faster processing. The result is stored in a destination buffer, ready for inference. 38 | * 39 | * @param src Pointer to the source image data in uint8 format. 40 | * @param src_width The width of the source image. 41 | * @param src_height The height of the source image. 42 | * @param dst Pointer to the destination buffer to store the preprocessed image in float format. 43 | * @param dst_width The desired width of the output image. 44 | * @param dst_height The desired height of the output image. 45 | * @param stream The CUDA stream to execute the preprocessing operation asynchronously. 46 | */ 47 | void cuda_preprocess(uint8_t* src, int src_width, int src_height, 48 | float* dst, int dst_width, int dst_height, 49 | cudaStream_t stream); 50 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #ifdef _WIN32 2 | #include 3 | #else 4 | #include 5 | #include 6 | #endif 7 | 8 | #include 9 | #include 10 | #include "yolov11.h" 11 | 12 | 13 | /** 14 | * @brief Setting up Tensorrt logger 15 | */ 16 | class Logger : public nvinfer1::ILogger { 17 | void log(Severity severity, const char* msg) noexcept override { 18 | // Only output logs with severity greater than warning 19 | if (severity <= Severity::kWARNING) 20 | std::cout << msg << std::endl; 21 | } 22 | }logger; 23 | 24 | 25 | int main(int argc, char* argv[]) { 26 | 27 | // Define color codes for terminal output 28 | const std::string RED_COLOR = "\033[31m"; 29 | const std::string GREEN_COLOR = "\033[32m"; 30 | const std::string YELLOW_COLOR = "\033[33m"; 31 | const std::string RESET_COLOR = "\033[0m"; 32 | 33 | // Check for valid number of arguments 34 | if (argc < 4 || argc > 5) { 35 | std::cerr << RED_COLOR << "Usage: " << RESET_COLOR << argv[0] 36 | << " [onnx_path]" << std::endl; 37 | std::cerr << YELLOW_COLOR << " - Mode of operation: 'convert', 'infer_video', or 'infer_image'" << RESET_COLOR << std::endl; 38 | std::cerr << YELLOW_COLOR << " - Path to the input video/image or ONNX model" << RESET_COLOR << std::endl; 39 | std::cerr << YELLOW_COLOR << " - Path to the TensorRT engine file" << RESET_COLOR << std::endl; 40 | std::cerr << YELLOW_COLOR << " [onnx_path] - Path to the ONNX model (only for 'convert' mode)" << RESET_COLOR << std::endl; 41 | return 1; 42 | } 43 | 44 | // Parse command-line arguments 45 | std::string mode = argv[1]; 46 | std::string inputPath = argv[2]; 47 | std::string enginePath = argv[3]; 48 | std::string onnxPath; 49 | 50 | // Validate mode and arguments 51 | if (mode == "convert") { 52 | if (argc != 5) { // 'convert' requires onnx_path 53 | std::cerr << RED_COLOR << "Usage for conversion: " << RESET_COLOR << argv[0] 54 | << " convert " << std::endl; 55 | return 1; 56 | } 57 | onnxPath = inputPath; // In 'convert' mode, inputPath is actually onnx_path 58 | } 59 | else if (mode == "infer_video" || mode == "infer_image") { 60 | if (argc != 4) { 61 | std::cerr << RED_COLOR << "Usage for " << mode << ": " << RESET_COLOR << argv[0] 62 | << " " << mode << " " << std::endl; 63 | return 1; 64 | } 65 | } 66 | else { 67 | std::cerr << RED_COLOR << "Invalid mode. Use 'convert', 'infer_video', or 'infer_image'." << RESET_COLOR << std::endl; 68 | return 1; 69 | } 70 | 71 | // Initialize the Logger 72 | Logger logger; 73 | 74 | // Handle 'convert' mode 75 | if (mode == "convert") { 76 | try { 77 | // Initialize YOLOv11 with the ONNX model path 78 | YOLOv11 yolov11(onnxPath, logger); 79 | std::cout << GREEN_COLOR << "Model conversion successful. Engine saved." << RESET_COLOR << std::endl; 80 | } 81 | catch (const std::exception& e) { 82 | std::cerr << RED_COLOR << "Error during model conversion: " << e.what() << RESET_COLOR << std::endl; 83 | return 1; 84 | } 85 | } 86 | // Handle inference modes 87 | else if (mode == "infer_video" || mode == "infer_image") { 88 | try { 89 | // Initialize YOLOv11 with the TensorRT engine path 90 | YOLOv11 yolov11(enginePath, logger); 91 | 92 | if (mode == "infer_video") { 93 | // Open the video file 94 | cv::VideoCapture cap(inputPath); 95 | if (!cap.isOpened()) { 96 | std::cerr << RED_COLOR << "Failed to open video file: " << inputPath << RESET_COLOR << std::endl; 97 | return 1; 98 | } 99 | 100 | // Prepare video writer to save the output (optional) 101 | std::string outputVideoPath = "output_video.avi"; 102 | int frame_width = static_cast(cap.get(cv::CAP_PROP_FRAME_WIDTH)); 103 | int frame_height = static_cast(cap.get(cv::CAP_PROP_FRAME_HEIGHT)); 104 | cv::VideoWriter video(outputVideoPath, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), 30, 105 | cv::Size(frame_width, frame_height)); 106 | 107 | cv::Mat frame; 108 | while (cap.read(frame)) { 109 | // Preprocess the frame 110 | yolov11.preprocess(frame); 111 | 112 | // Perform inference 113 | yolov11.infer(); 114 | 115 | // Postprocess to get detections 116 | std::vector detections; 117 | yolov11.postprocess(detections); 118 | 119 | // Draw detections on the frame 120 | yolov11.draw(frame, detections); 121 | 122 | // Display the frame (optional) 123 | cv::imshow("Inference", frame); 124 | if (cv::waitKey(1) == 27) { // Exit on 'ESC' key 125 | break; 126 | } 127 | 128 | // Write the frame to the output video 129 | video.write(frame); 130 | } 131 | 132 | cap.release(); 133 | video.release(); 134 | cv::destroyAllWindows(); 135 | std::cout << GREEN_COLOR << "Video inference completed. Output saved to " 136 | << outputVideoPath << RESET_COLOR << std::endl; 137 | } 138 | else if (mode == "infer_image") { 139 | // Read the image 140 | cv::Mat image = cv::imread(inputPath); 141 | if (image.empty()) { 142 | std::cerr << RED_COLOR << "Failed to read image: " << inputPath << RESET_COLOR << std::endl; 143 | return 1; 144 | } 145 | 146 | // Preprocess the image 147 | yolov11.preprocess(image); 148 | 149 | // Perform inference 150 | yolov11.infer(); 151 | 152 | // Postprocess to get detections 153 | std::vector detections; 154 | yolov11.postprocess(detections); 155 | 156 | // Draw detections on the image 157 | yolov11.draw(image, detections); 158 | 159 | // Display the image (optional) 160 | cv::imshow("Inference", image); 161 | cv::waitKey(0); // Wait indefinitely until a key is pressed 162 | 163 | // Save the output image 164 | std::string outputImagePath = "output_image.jpg"; 165 | cv::imwrite(outputImagePath, image); 166 | std::cout << GREEN_COLOR << "Image inference completed. Output saved to " 167 | << outputImagePath << RESET_COLOR << std::endl; 168 | } 169 | } 170 | catch (const std::exception& e) { 171 | std::cerr << RED_COLOR << "Error during inference: " << e.what() << RESET_COLOR << std::endl; 172 | return 1; 173 | } 174 | } 175 | 176 | return 0; 177 | } -------------------------------------------------------------------------------- /src/YOLOv11.cpp: -------------------------------------------------------------------------------- 1 | #include "YOLOv11.h" // Header file for YOLOv11 class 2 | #include "logging.h" // Logging utilities 3 | #include "cuda_utils.h" // CUDA utility functions 4 | #include "macros.h" // Common macros 5 | #include "preprocess.h" // Preprocessing functions 6 | #include // NVIDIA ONNX parser for TensorRT 7 | #include "common.h" // Common definitions and utilities 8 | #include // File stream operations 9 | #include // Input/output stream operations 10 | 11 | // Initialize a static logger instance 12 | static Logger logger; 13 | 14 | // Define whether to use FP16 precision 15 | #define isFP16 true 16 | 17 | // Define whether to perform model warmup 18 | #define warmup true 19 | 20 | // Constructor for the YOLOv11 class 21 | YOLOv11::YOLOv11(string model_path, nvinfer1::ILogger& logger) 22 | { 23 | // Check if the model path does not contain ".onnx" 24 | if (model_path.find(".onnx") == std::string::npos) 25 | { 26 | // Initialize the engine from a serialized engine file 27 | init(model_path, logger); 28 | } 29 | else 30 | { 31 | // Build the engine from an ONNX model 32 | build(model_path, logger); 33 | // Save the built engine to a file 34 | saveEngine(model_path); 35 | } 36 | 37 | // Handle input dimensions based on TensorRT version 38 | #if NV_TENSORRT_MAJOR < 10 39 | // For TensorRT versions less than 10, get binding dimensions directly 40 | auto input_dims = engine->getBindingDimensions(0); 41 | input_h = input_dims.d[2]; 42 | input_w = input_dims.d[3]; 43 | #else 44 | // For TensorRT versions 10 and above, use getTensorShape 45 | auto input_dims = engine->getTensorShape(engine->getIOTensorName(0)); 46 | input_h = input_dims.d[2]; 47 | input_w = input_dims.d[3]; 48 | #endif 49 | } 50 | 51 | // Initialize the engine from a serialized engine file 52 | void YOLOv11::init(std::string engine_path, nvinfer1::ILogger& logger) 53 | { 54 | // Open the engine file in binary mode 55 | ifstream engineStream(engine_path, ios::binary); 56 | // Move to the end to determine file size 57 | engineStream.seekg(0, ios::end); 58 | const size_t modelSize = engineStream.tellg(); 59 | // Move back to the beginning of the file 60 | engineStream.seekg(0, ios::beg); 61 | // Allocate memory to read the engine data 62 | unique_ptr engineData(new char[modelSize]); 63 | // Read the engine data into memory 64 | engineStream.read(engineData.get(), modelSize); 65 | engineStream.close(); 66 | 67 | // Create a TensorRT runtime instance 68 | runtime = createInferRuntime(logger); 69 | // Deserialize the CUDA engine from the engine data 70 | engine = runtime->deserializeCudaEngine(engineData.get(), modelSize); 71 | // Create an execution context for the engine 72 | context = engine->createExecutionContext(); 73 | 74 | // Retrieve input dimensions from the engine 75 | input_h = engine->getBindingDimensions(0).d[2]; 76 | input_w = engine->getBindingDimensions(0).d[3]; 77 | // Retrieve detection attributes and number of detections 78 | detection_attribute_size = engine->getBindingDimensions(1).d[1]; 79 | num_detections = engine->getBindingDimensions(1).d[2]; 80 | // Calculate the number of classes based on detection attributes 81 | num_classes = detection_attribute_size - 4; 82 | 83 | // Allocate CPU memory for output buffer 84 | cpu_output_buffer = new float[detection_attribute_size * num_detections]; 85 | // Allocate GPU memory for input buffer (assuming 3 channels: RGB) 86 | CUDA_CHECK(cudaMalloc(&gpu_buffers[0], 3 * input_w * input_h * sizeof(float))); 87 | // Allocate GPU memory for output buffer 88 | CUDA_CHECK(cudaMalloc(&gpu_buffers[1], detection_attribute_size * num_detections * sizeof(float))); 89 | 90 | // Initialize CUDA preprocessing with maximum image size 91 | cuda_preprocess_init(MAX_IMAGE_SIZE); 92 | 93 | // Create a CUDA stream for asynchronous operations 94 | CUDA_CHECK(cudaStreamCreate(&stream)); 95 | 96 | // Perform model warmup if enabled 97 | if (warmup) { 98 | for (int i = 0; i < 10; i++) { 99 | this->infer(); // Run inference to warm up the model 100 | } 101 | printf("model warmup 10 times\n"); 102 | } 103 | } 104 | 105 | // Destructor for the YOLOv11 class 106 | YOLOv11::~YOLOv11() 107 | { 108 | // Synchronize and destroy the CUDA stream 109 | CUDA_CHECK(cudaStreamSynchronize(stream)); 110 | CUDA_CHECK(cudaStreamDestroy(stream)); 111 | // Free allocated GPU buffers 112 | for (int i = 0; i < 2; i++) 113 | CUDA_CHECK(cudaFree(gpu_buffers[i])); 114 | // Free CPU output buffer 115 | delete[] cpu_output_buffer; 116 | 117 | // Destroy CUDA preprocessing resources 118 | cuda_preprocess_destroy(); 119 | // Delete TensorRT context, engine, and runtime 120 | delete context; 121 | delete engine; 122 | delete runtime; 123 | } 124 | 125 | // Preprocess the input image and transfer it to the GPU buffer 126 | void YOLOv11::preprocess(Mat& image) { 127 | // Perform CUDA-based preprocessing 128 | cuda_preprocess(image.ptr(), image.cols, image.rows, gpu_buffers[0], input_w, input_h, stream); 129 | // Synchronize the CUDA stream to ensure preprocessing is complete 130 | CUDA_CHECK(cudaStreamSynchronize(stream)); 131 | } 132 | 133 | // Perform inference using the TensorRT execution context 134 | void YOLOv11::infer() 135 | { 136 | #if NV_TENSORRT_MAJOR < 10 137 | // For TensorRT versions less than 10, use enqueueV2 with GPU buffers 138 | context->enqueueV2((void**)gpu_buffers, stream, nullptr); 139 | #else 140 | // For TensorRT versions 10 and above, use enqueueV3 with the CUDA stream 141 | this->context->enqueueV3(this->stream); 142 | #endif 143 | } 144 | 145 | // Postprocess the inference output to extract detections 146 | void YOLOv11::postprocess(vector& output) 147 | { 148 | // Asynchronously copy output from GPU to CPU 149 | CUDA_CHECK(cudaMemcpyAsync(cpu_output_buffer, gpu_buffers[1], num_detections * detection_attribute_size * sizeof(float), cudaMemcpyDeviceToHost, stream)); 150 | // Synchronize the CUDA stream to ensure copy is complete 151 | CUDA_CHECK(cudaStreamSynchronize(stream)); 152 | 153 | vector boxes; // Bounding boxes 154 | vector class_ids; // Class IDs 155 | vector confidences; // Confidence scores 156 | 157 | // Create a matrix view of the detection output 158 | const Mat det_output(detection_attribute_size, num_detections, CV_32F, cpu_output_buffer); 159 | 160 | // Iterate over each detection 161 | for (int i = 0; i < det_output.cols; ++i) { 162 | // Extract class scores for the current detection 163 | const Mat classes_scores = det_output.col(i).rowRange(4, 4 + num_classes); 164 | Point class_id_point; 165 | double score; 166 | // Find the class with the maximum score 167 | minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point); 168 | 169 | // Check if the confidence score exceeds the threshold 170 | if (score > conf_threshold) { 171 | // Extract bounding box coordinates 172 | const float cx = det_output.at(0, i); 173 | const float cy = det_output.at(1, i); 174 | const float ow = det_output.at(2, i); 175 | const float oh = det_output.at(3, i); 176 | Rect box; 177 | // Calculate top-left corner of the bounding box 178 | box.x = static_cast((cx - 0.5 * ow)); 179 | box.y = static_cast((cy - 0.5 * oh)); 180 | // Set width and height of the bounding box 181 | box.width = static_cast(ow); 182 | box.height = static_cast(oh); 183 | 184 | // Store the bounding box, class ID, and confidence 185 | boxes.push_back(box); 186 | class_ids.push_back(class_id_point.y); 187 | confidences.push_back(score); 188 | } 189 | } 190 | 191 | vector nms_result; // Indices after Non-Maximum Suppression (NMS) 192 | // Apply NMS to remove overlapping boxes 193 | dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, nms_result); 194 | 195 | // Iterate over NMS results and populate the output detections 196 | for (int i = 0; i < nms_result.size(); i++) 197 | { 198 | Detection result; 199 | int idx = nms_result[i]; 200 | result.class_id = class_ids[idx]; 201 | result.conf = confidences[idx]; 202 | result.bbox = boxes[idx]; 203 | output.push_back(result); 204 | } 205 | } 206 | 207 | // Build the TensorRT engine from an ONNX model 208 | void YOLOv11::build(std::string onnxPath, nvinfer1::ILogger& logger) 209 | { 210 | // Create a TensorRT builder 211 | auto builder = createInferBuilder(logger); 212 | // Define network flags for explicit batch dimensions 213 | const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 214 | // Create a network definition with explicit batch 215 | INetworkDefinition* network = builder->createNetworkV2(explicitBatch); 216 | // Create builder configuration 217 | IBuilderConfig* config = builder->createBuilderConfig(); 218 | // Enable FP16 precision if specified 219 | if (isFP16) 220 | { 221 | config->setFlag(BuilderFlag::kFP16); 222 | } 223 | // Create an ONNX parser 224 | nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, logger); 225 | // Parse the ONNX model file 226 | bool parsed = parser->parseFromFile(onnxPath.c_str(), static_cast(nvinfer1::ILogger::Severity::kINFO)); 227 | // Build the serialized network plan 228 | IHostMemory* plan{ builder->buildSerializedNetwork(*network, *config) }; 229 | 230 | // Create a TensorRT runtime 231 | runtime = createInferRuntime(logger); 232 | 233 | // Deserialize the CUDA engine from the serialized plan 234 | engine = runtime->deserializeCudaEngine(plan->data(), plan->size()); 235 | 236 | // Create an execution context for the engine 237 | context = engine->createExecutionContext(); 238 | 239 | // Clean up allocated resources 240 | delete network; 241 | delete config; 242 | delete parser; 243 | delete plan; 244 | } 245 | 246 | // Save the serialized TensorRT engine to a file 247 | bool YOLOv11::saveEngine(const std::string& onnxpath) 248 | { 249 | // Generate the engine file path by replacing the extension with ".engine" 250 | std::string engine_path; 251 | size_t dotIndex = onnxpath.find_last_of("."); 252 | if (dotIndex != std::string::npos) { 253 | engine_path = onnxpath.substr(0, dotIndex) + ".engine"; 254 | } 255 | else 256 | { 257 | return false; // Return false if no extension is found 258 | } 259 | 260 | // Check if the engine is valid 261 | if (engine) 262 | { 263 | // Serialize the engine 264 | nvinfer1::IHostMemory* data = engine->serialize(); 265 | std::ofstream file; 266 | // Open the engine file in binary write mode 267 | file.open(engine_path, std::ios::binary | std::ios::out); 268 | if (!file.is_open()) 269 | { 270 | std::cout << "Create engine file " << engine_path << " failed" << std::endl; 271 | return false; 272 | } 273 | // Write the serialized engine data to the file 274 | file.write((const char*)data->data(), data->size()); 275 | file.close(); 276 | 277 | // Free the serialized data memory 278 | delete data; 279 | } 280 | return true; 281 | } 282 | 283 | // Draw bounding boxes and labels on the image based on detections 284 | void YOLOv11::draw(Mat& image, const vector& output) 285 | { 286 | // Calculate the scaling ratios between input and original image dimensions 287 | const float ratio_h = input_h / (float)image.rows; 288 | const float ratio_w = input_w / (float)image.cols; 289 | 290 | // Iterate over each detection 291 | for (int i = 0; i < output.size(); i++) 292 | { 293 | auto detection = output[i]; 294 | auto box = detection.bbox; 295 | auto class_id = detection.class_id; 296 | auto conf = detection.conf; 297 | // Assign a color based on the class ID 298 | cv::Scalar color = cv::Scalar(COLORS[class_id][0], COLORS[class_id][1], COLORS[class_id][2]); 299 | 300 | // Adjust bounding box coordinates based on aspect ratio 301 | if (ratio_h > ratio_w) 302 | { 303 | box.x = box.x / ratio_w; 304 | box.y = (box.y - (input_h - ratio_w * image.rows) / 2) / ratio_w; 305 | box.width = box.width / ratio_w; 306 | box.height = box.height / ratio_w; 307 | } 308 | else 309 | { 310 | box.x = (box.x - (input_w - ratio_h * image.cols) / 2) / ratio_h; 311 | box.y = box.y / ratio_h; 312 | box.width = box.width / ratio_h; 313 | box.height = box.height / ratio_h; 314 | } 315 | 316 | // Draw the bounding box on the image 317 | rectangle(image, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), color, 3); 318 | 319 | // Prepare the label text with class name and confidence 320 | string class_string = CLASS_NAMES[class_id] + ' ' + to_string(conf).substr(0, 4); 321 | // Calculate the size of the text for background rectangle 322 | Size text_size = getTextSize(class_string, FONT_HERSHEY_DUPLEX, 1, 2, 0); 323 | // Define the background rectangle for the text 324 | Rect text_rect(box.x, box.y - 40, text_size.width + 10, text_size.height + 20); 325 | // Draw the background rectangle 326 | rectangle(image, text_rect, color, FILLED); 327 | // Put the text label on the image 328 | putText(image, class_string, Point(box.x + 5, box.y - 10), FONT_HERSHEY_DUPLEX, 1, Scalar(0, 0, 0), 2, 0); 329 | } 330 | } 331 | -------------------------------------------------------------------------------- /src/preprocess.cu: -------------------------------------------------------------------------------- 1 | #include "preprocess.h" 2 | #include "cuda_utils.h" 3 | #include "device_launch_parameters.h" 4 | 5 | // Host and device pointers for image buffers 6 | static uint8_t* img_buffer_host = nullptr; // Pinned memory on the host for faster transfers 7 | static uint8_t* img_buffer_device = nullptr; // Memory on the device (GPU) 8 | 9 | // Structure to represent a 2x3 affine transformation matrix 10 | struct AffineMatrix { 11 | float value[6]; // [m00, m01, m02, m10, m11, m12] 12 | }; 13 | 14 | // CUDA kernel to perform affine warp on the image 15 | __global__ void warpaffine_kernel( 16 | uint8_t* src, // Source image on device 17 | int src_line_size, // Number of bytes per source image row 18 | int src_width, // Source image width 19 | int src_height, // Source image height 20 | float* dst, // Destination image on device (output) 21 | int dst_width, // Destination image width 22 | int dst_height, // Destination image height 23 | uint8_t const_value_st, // Constant value for out-of-bound pixels 24 | AffineMatrix d2s, // Affine transformation matrix (destination to source) 25 | int edge // Total number of pixels to process 26 | ) { 27 | // Calculate the global position of the thread 28 | int position = blockDim.x * blockIdx.x + threadIdx.x; 29 | if (position >= edge) return; // Exit if position exceeds total pixels 30 | 31 | // Extract affine matrix elements 32 | float m_x1 = d2s.value[0]; 33 | float m_y1 = d2s.value[1]; 34 | float m_z1 = d2s.value[2]; 35 | float m_x2 = d2s.value[3]; 36 | float m_y2 = d2s.value[4]; 37 | float m_z2 = d2s.value[5]; 38 | 39 | // Calculate destination pixel coordinates 40 | int dx = position % dst_width; 41 | int dy = position / dst_width; 42 | 43 | // Apply affine transformation to get source coordinates 44 | float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f; 45 | float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f; 46 | 47 | float c0, c1, c2; // Color channels (B, G, R) 48 | 49 | // Check if the source coordinates are out of bounds 50 | if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) { 51 | // Assign constant value if out of range 52 | c0 = const_value_st; 53 | c1 = const_value_st; 54 | c2 = const_value_st; 55 | } 56 | else { 57 | // Perform bilinear interpolation 58 | 59 | // Get the integer parts of the source coordinates 60 | int y_low = floorf(src_y); 61 | int x_low = floorf(src_x); 62 | int y_high = y_low + 1; 63 | int x_high = x_low + 1; 64 | 65 | // Initialize constant values for out-of-bound pixels 66 | uint8_t const_value[] = { const_value_st, const_value_st, const_value_st }; 67 | 68 | // Calculate the fractional parts 69 | float ly = src_y - y_low; 70 | float lx = src_x - x_low; 71 | float hy = 1 - ly; 72 | float hx = 1 - lx; 73 | 74 | // Compute the weights for the four surrounding pixels 75 | float w1 = hy * hx; // Top-left 76 | float w2 = hy * lx; // Top-right 77 | float w3 = ly * hx; // Bottom-left 78 | float w4 = ly * lx; // Bottom-right 79 | 80 | // Initialize pointers to the four surrounding pixels 81 | uint8_t* v1 = const_value; 82 | uint8_t* v2 = const_value; 83 | uint8_t* v3 = const_value; 84 | uint8_t* v4 = const_value; 85 | 86 | // Top-left pixel 87 | if (y_low >= 0) { 88 | if (x_low >= 0) 89 | v1 = src + y_low * src_line_size + x_low * 3; 90 | // Top-right pixel 91 | if (x_high < src_width) 92 | v2 = src + y_low * src_line_size + x_high * 3; 93 | } 94 | 95 | // Bottom-left and Bottom-right pixels 96 | if (y_high < src_height) { 97 | if (x_low >= 0) 98 | v3 = src + y_high * src_line_size + x_low * 3; 99 | if (x_high < src_width) 100 | v4 = src + y_high * src_line_size + x_high * 3; 101 | } 102 | 103 | // Perform bilinear interpolation for each color channel 104 | c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0]; // Blue 105 | c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1]; // Green 106 | c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2]; // Red 107 | } 108 | 109 | // Convert from BGR to RGB by swapping channels 110 | float t = c2; 111 | c2 = c0; 112 | c0 = t; 113 | 114 | // Normalize the color values to [0, 1] 115 | c0 = c0 / 255.0f; 116 | c1 = c1 / 255.0f; 117 | c2 = c2 / 255.0f; 118 | 119 | // Rearrange the output format from interleaved RGB to separate channels 120 | int area = dst_width * dst_height; 121 | float* pdst_c0 = dst + dy * dst_width + dx; // Red channel 122 | float* pdst_c1 = pdst_c0 + area; // Green channel 123 | float* pdst_c2 = pdst_c1 + area; // Blue channel 124 | 125 | // Assign the normalized color values to the destination buffers 126 | *pdst_c0 = c0; 127 | *pdst_c1 = c1; 128 | *pdst_c2 = c2; 129 | } 130 | 131 | // Host function to perform CUDA-based preprocessing 132 | void cuda_preprocess( 133 | uint8_t* src, // Source image data on host 134 | int src_width, // Source image width 135 | int src_height, // Source image height 136 | float* dst, // Destination buffer on device 137 | int dst_width, // Destination image width 138 | int dst_height, // Destination image height 139 | cudaStream_t stream // CUDA stream for asynchronous execution 140 | ) { 141 | // Calculate the size of the image in bytes (3 channels: BGR) 142 | int img_size = src_width * src_height * 3; 143 | 144 | // Copy source image data to pinned host memory for faster transfer 145 | memcpy(img_buffer_host, src, img_size); 146 | 147 | // Asynchronously copy image data from host to device memory 148 | CUDA_CHECK(cudaMemcpyAsync( 149 | img_buffer_device, 150 | img_buffer_host, 151 | img_size, 152 | cudaMemcpyHostToDevice, 153 | stream 154 | )); 155 | 156 | // Define affine transformation matrices 157 | AffineMatrix s2d, d2s; // Source to destination and vice versa 158 | 159 | // Calculate the scaling factor to maintain aspect ratio 160 | float scale = std::min( 161 | dst_height / (float)src_height, 162 | dst_width / (float)src_width 163 | ); 164 | 165 | // Initialize source-to-destination affine matrix (s2d) 166 | s2d.value[0] = scale; // m00 167 | s2d.value[1] = 0; // m01 168 | s2d.value[2] = -scale * src_width * 0.5f + dst_width * 0.5f; // m02 169 | s2d.value[3] = 0; // m10 170 | s2d.value[4] = scale; // m11 171 | s2d.value[5] = -scale * src_height * 0.5f + dst_height * 0.5f; // m12 172 | 173 | // Create OpenCV matrices for affine transformation 174 | cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value); 175 | cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value); 176 | 177 | // Invert the source-to-destination matrix to get destination-to-source 178 | cv::invertAffineTransform(m2x3_s2d, m2x3_d2s); 179 | 180 | // Copy the inverted matrix back to d2s 181 | memcpy(d2s.value, m2x3_d2s.ptr(0), sizeof(d2s.value)); 182 | 183 | // Calculate the total number of pixels to process 184 | int jobs = dst_height * dst_width; 185 | 186 | // Define the number of threads per block 187 | int threads = 256; 188 | 189 | // Calculate the number of blocks needed 190 | int blocks = ceil(jobs / (float)threads); 191 | 192 | // Launch the warp affine kernel 193 | warpaffine_kernel << > > ( 194 | img_buffer_device, // Source image on device 195 | src_width * 3, // Source line size (bytes per row) 196 | src_width, // Source width 197 | src_height, // Source height 198 | dst, // Destination buffer on device 199 | dst_width, // Destination width 200 | dst_height, // Destination height 201 | 128, // Constant value for out-of-bounds (gray) 202 | d2s, // Destination to source affine matrix 203 | jobs // Total number of pixels 204 | ); 205 | 206 | // Optionally, you might want to check for kernel launch errors 207 | CUDA_CHECK(cudaGetLastError()); 208 | } 209 | 210 | // Initialize CUDA preprocessing by allocating memory 211 | void cuda_preprocess_init(int max_image_size) { 212 | // Allocate pinned (page-locked) memory on the host for faster transfers 213 | CUDA_CHECK(cudaMallocHost((void**)&img_buffer_host, max_image_size * 3)); 214 | 215 | // Allocate memory on the device (GPU) for the image 216 | CUDA_CHECK(cudaMalloc((void**)&img_buffer_device, max_image_size * 3)); 217 | } 218 | 219 | // Clean up and free allocated memory 220 | void cuda_preprocess_destroy() { 221 | // Free device memory 222 | CUDA_CHECK(cudaFree(img_buffer_device)); 223 | 224 | // Free pinned host memory 225 | CUDA_CHECK(cudaFreeHost(img_buffer_host)); 226 | } 227 | --------------------------------------------------------------------------------