├── LICENSE ├── MNIST ├── CMakeLists.txt ├── build.cu ├── cmake │ └── FindTensorRT.cmake ├── img │ ├── img0.png │ ├── img1.png │ ├── img2.png │ ├── img3.png │ ├── img4.png │ ├── img5.png │ ├── img6.png │ ├── img7.png │ ├── img8.png │ └── img9.png ├── runtime.cu ├── torch2onnx.py └── train.py ├── README.md ├── yolov8 ├── .vscode │ ├── c_cpp_properties.json │ ├── launch.json │ ├── settings.json │ └── tasks.json ├── CMakeLists.txt ├── build.cu ├── cmake │ └── FindTensorRT.cmake ├── img │ ├── img0.jpg │ └── img1.jpg ├── runtime.cu ├── yolov8_utils.cpp └── yolov8_utils.h └── yolov8_accelerate ├── .vscode ├── c_cpp_properties.json ├── launch.json ├── settings.json └── tasks.json ├── CMakeLists.txt ├── build.cu ├── calibrator.cpp ├── calibrator.h ├── cmake └── FindTensorRT.cmake ├── cuda_utils.h ├── int8calib.table ├── runtime.cu ├── yolov8_utils.cpp └── yolov8_utils.h /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Chen Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MNIST/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(MNIST LANGUAGES CXX CUDA) 3 | 4 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") 5 | find_package(TensorRT REQUIRED) 6 | find_package(OpenCV REQUIRED) 7 | 8 | add_executable(build 9 | build.cu 10 | ) 11 | target_link_libraries(build PUBLIC ${TensorRT_LIBRARIES}) 12 | target_include_directories(build PUBLIC ${TensorRT_INCLUDE_DIRS}) 13 | 14 | add_executable(runtime 15 | runtime.cu 16 | ) 17 | target_link_libraries(runtime PUBLIC ${TensorRT_LIBRARIES} ${OpenCV_LIBS}) 18 | target_include_directories(runtime PUBLIC ${TensorRT_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS}) 19 | -------------------------------------------------------------------------------- /MNIST/build.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace nvinfer1; 18 | 19 | // 以下示例捕获所有警告消息,但忽略信息性消息 20 | class Logger : public ILogger 21 | { 22 | void log(Severity severity, const char* msg) noexcept override 23 | { 24 | // 抑制信息级别的消息 25 | if (severity <= Severity::kWARNING) 26 | cout << msg << endl; 27 | } 28 | }; 29 | 30 | int main(int argc, char **argv) 31 | { 32 | // 实例化ILogger 33 | Logger logger; 34 | 35 | // 创建builder 36 | auto builder = unique_ptr(createInferBuilder(logger)); 37 | 38 | // 创建网络(显性batch) 39 | uint32_t flag = 1U < 40 | (NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 41 | auto network = unique_ptr(builder->createNetworkV2(flag)); 42 | 43 | // 创建ONNX解析器:parser 44 | auto parser = unique_ptr(nvonnxparser::createParser(*network, logger)); 45 | // 读取文件 46 | char *file_path = "MNIST.onnx"; 47 | parser->parseFromFile(file_path, static_cast(ILogger::Severity::kWARNING)); 48 | 49 | // 创建构建配置,用来指定trt如何优化模型 50 | auto config = unique_ptr(builder->createBuilderConfig()); 51 | // 设定配置 52 | // 工作空间大小 53 | config->setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, 1U << 20); 54 | // 设置精度 55 | config->setFlag(nvinfer1::BuilderFlag::kFP16); 56 | 57 | // 创建引擎 58 | auto engine = unique_ptr(builder->buildSerializedNetwork(*network, *config)); 59 | 60 | //序列化保存engine 61 | ofstream engine_file("./MNIST.engine", ios::binary); 62 | assert(engine_file.is_open() && "Failed to open engine file"); 63 | engine_file.write((char *)engine->data(), engine->size()); 64 | engine_file.close(); 65 | 66 | cout << "Engine build success!" << endl; 67 | return 0; 68 | } -------------------------------------------------------------------------------- /MNIST/cmake/FindTensorRT.cmake: -------------------------------------------------------------------------------- 1 | ## find tensorrt 2 | include(FindPackageHandleStandardArgs) 3 | 4 | ## 用户可以输入的TensorRT 搜索路径 5 | set(TensorRT_ROOT 6 | "" 7 | CACHE 8 | PATH 9 | "TensorRT root directory") 10 | 11 | ## 设置TensorRT 搜索路径 12 | set(TensorRT_SEARCH_PATH 13 | /usr/include/x86_64-linux-gnu 14 | /usr/src/tensorrt 15 | /usr/lib/x86_64-linux-gnu 16 | ${TensorRT_ROOT} 17 | ) 18 | 19 | ## 设置需要搜索的TensorRT 依赖库 20 | set(TensorRT_ALL_LIBS 21 | nvinfer 22 | nvinfer_plugin 23 | nvparsers 24 | nvonnxparser 25 | ) 26 | 27 | ## 提前设置后面需要用的变量 28 | set(TensorRT_LIBS_LIST) 29 | set(TensorRT_LIBRARIES) 30 | 31 | ## 搜索头文件的路径 32 | find_path( 33 | TensorRT_INCLUDE_DIR 34 | NAMES NvInfer.h 35 | PATHS ${TensorRT_SEARCH_PATH} 36 | ) 37 | 38 | ## 利用头文件路径下的version文件来设置TensorRT的版本信息 39 | if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h") 40 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") 41 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") 42 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") 43 | 44 | string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") 45 | string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") 46 | string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") 47 | set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") 48 | endif() 49 | message("TensorRT version: ${TensorRT_VERSION_STRING}") 50 | 51 | ## 搜索sample code的路径 52 | find_path( 53 | TensorRT_SAMPLE_DIR 54 | NAMES trtexec/trtexec.cpp 55 | PATHS ${TensorRT_SEARCH_PATH} 56 | PATH_SUFFIXES samples 57 | ) 58 | 59 | ## 依次搜索TensorRT依赖库 60 | foreach(lib ${TensorRT_ALL_LIBS} ) 61 | find_library( 62 | TensorRT_${lib}_LIBRARY 63 | NAMES ${lib} 64 | PATHS ${TensorRT_SEARCH_PATH} 65 | ) 66 | ## 存储TensorRT的依赖库变量 67 | set(TensorRT_LIBS_VARS TensorRT_${lib}_LIBRARY ${TensorRT_LIBS_LIST}) 68 | ## 也是TensorRT的依赖库,存成list,方便后面用foreach 69 | list(APPEND TensorRT_LIBS_LIST TensorRT_${lib}_LIBRARY) 70 | endforeach() 71 | 72 | ## 调用cmake内置功能,设置基础变量如xxx_FOUND 73 | find_package_handle_standard_args(TensorRT REQUIRED_VARS TensorRT_INCLUDE_DIR TensorRT_SAMPLE_DIR ${TensorRT_LIBS_VARS}) 74 | 75 | if(TensorRT_FOUND) 76 | ## 设置Tensor_LIBRARIES变量 77 | foreach(lib ${TensorRT_LIBS_LIST} ) 78 | list(APPEND TensorRT_LIBRARIES ${${lib}}) 79 | endforeach() 80 | message("Found TensorRT: ${TensorRT_INCLUDE_DIR} ${TensorRT_LIBRARIES} ${TensorRT_SAMPLE_DIR}") 81 | message("TensorRT version: ${TensorRT_VERSION_STRING}") 82 | endif() -------------------------------------------------------------------------------- /MNIST/img/img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img0.png -------------------------------------------------------------------------------- /MNIST/img/img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img1.png -------------------------------------------------------------------------------- /MNIST/img/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img2.png -------------------------------------------------------------------------------- /MNIST/img/img3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img3.png -------------------------------------------------------------------------------- /MNIST/img/img4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img4.png -------------------------------------------------------------------------------- /MNIST/img/img5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img5.png -------------------------------------------------------------------------------- /MNIST/img/img6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img6.png -------------------------------------------------------------------------------- /MNIST/img/img7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img7.png -------------------------------------------------------------------------------- /MNIST/img/img8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img8.png -------------------------------------------------------------------------------- /MNIST/img/img9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/MNIST/img/img9.png -------------------------------------------------------------------------------- /MNIST/runtime.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace std; 19 | using namespace nvinfer1; 20 | 21 | // 以下示例捕获所有警告消息,但忽略信息性消息 22 | class Logger : public ILogger 23 | { 24 | void log(Severity severity, const char* msg) noexcept override 25 | { 26 | // 抑制信息级别的消息 27 | if (severity <= Severity::kWARNING) 28 | cout << msg << endl; 29 | } 30 | }; 31 | 32 | // 加载模型文件 33 | std::vector load_engine_file(const std::string &file_name) 34 | { 35 | std::vector engine_data; 36 | std::ifstream engine_file(file_name, std::ios::binary); 37 | assert(engine_file.is_open() && "Unable to load engine file."); 38 | engine_file.seekg(0, engine_file.end); 39 | int length = engine_file.tellg(); 40 | engine_data.resize(length); 41 | engine_file.seekg(0, engine_file.beg); 42 | engine_file.read(reinterpret_cast(engine_data.data()), length); 43 | return engine_data; 44 | } 45 | 46 | int softmax(const float(&rst)[10]){ 47 | float cache = 0; 48 | int idx = 0; 49 | for(int i = 0; i < 10; i += 1) 50 | { 51 | if(rst[i]>cache) 52 | { 53 | cache = rst[i]; 54 | idx = i; 55 | }; 56 | }; 57 | return idx; 58 | } 59 | 60 | int main(int argc, char **argv) 61 | { 62 | // 实例化ILogger 63 | Logger logger; 64 | 65 | // 创建runtime 66 | auto runtime = unique_ptr(createInferRuntime(logger)); 67 | 68 | // 读取engine,反序列化 69 | string file_path = "MNIST.engine"; 70 | auto plan = load_engine_file(file_path); 71 | auto engine = shared_ptr(runtime->deserializeCudaEngine(plan.data(), plan.size())); 72 | 73 | // 创建执行上下文 74 | auto context = unique_ptr(engine->createExecutionContext()); 75 | 76 | auto idims = engine->getTensorShape("input.1");// 这里的名字可以在导出时修改 77 | auto odims = engine->getTensorShape("23"); 78 | Dims4 inputDims = { 1, idims.d[1], idims.d[2], idims.d[3] }; 79 | Dims2 outputDims = { 1, 10 }; 80 | context->setInputShape("input.1", inputDims); 81 | 82 | void* buffers[2]; 83 | const int inputIndex = 0; 84 | const int outputIndex = 1; 85 | 86 | cudaMalloc(&buffers[inputIndex], 1 * 28 * 28 * sizeof(float)); 87 | cudaMalloc(&buffers[outputIndex], 10 * sizeof(float)); 88 | 89 | // 设定数据地址 90 | context->setTensorAddress("input.1", buffers[inputIndex]); 91 | context->setTensorAddress("23", buffers[outputIndex]); 92 | 93 | // 创建cuda流 94 | cudaStream_t stream; 95 | cudaStreamCreate(&stream); 96 | 97 | // 读取文件执行推理 98 | for(int i = 0; i < 10; i += 1) 99 | { 100 | // 读取图片 101 | cv::Mat img0; 102 | std::string file_name = "img/img" + std::to_string(i) + ".png"; 103 | img0 = cv::imread(file_name, 0);// 0为灰度图片 104 | if (img0.empty()) //检测image有无数据,无数据 image.empty()返回 真 105 | { 106 | std::cout << "Could not open or find the image" << std::endl; 107 | return -1; 108 | } 109 | cv::Mat img; 110 | img0.convertTo(img, CV_32F); 111 | // cv::imshow(file_name,img); 112 | // cv::waitKey(0); 113 | 114 | // 将图像拷贝到GPU 115 | cudaMemcpyAsync(buffers[inputIndex], img.data,1 * 28 * 28 * sizeof(float), cudaMemcpyHostToDevice, stream); 116 | 117 | //执行推理 118 | context->enqueueV3(stream); 119 | cudaStreamSynchronize(stream); 120 | 121 | float rst[10]; 122 | cudaMemcpyAsync(&rst, buffers[outputIndex], 1 * 10 * sizeof(float), cudaMemcpyDeviceToHost, stream); 123 | 124 | cout << file_name << " 推理结果: " << softmax(rst) < 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace nvinfer1; 18 | 19 | // 以下示例捕获所有警告消息,但忽略信息性消息 20 | class Logger : public ILogger 21 | { 22 | void log(Severity severity, const char* msg) noexcept override 23 | { 24 | // 抑制信息级别的消息 25 | if (severity <= Severity::kWARNING) 26 | cout << msg << endl; 27 | } 28 | }; 29 | 30 | int main(int argc, char **argv) 31 | { 32 | // 实例化ILogger 33 | Logger logger; 34 | 35 | // 创建builder 36 | auto builder = unique_ptr(createInferBuilder(logger)); 37 | 38 | // 创建网络(显性batch) 39 | uint32_t flag = 1U < 40 | (NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 41 | auto network = unique_ptr(builder->createNetworkV2(flag)); 42 | 43 | // 创建ONNX解析器:parser 44 | auto parser = unique_ptr(nvonnxparser::createParser(*network, logger)); 45 | // 读取文件 46 | char *file_path = "yolov8s.onnx"; 47 | parser->parseFromFile(file_path, static_cast(ILogger::Severity::kWARNING)); 48 | 49 | // 创建构建配置,用来指定trt如何优化模型 50 | auto config = unique_ptr(builder->createBuilderConfig()); 51 | // 设定配置 52 | // 工作空间大小 53 | config->setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, 1U << 20); 54 | // 设置精度 55 | config->setFlag(nvinfer1::BuilderFlag::kFP16); 56 | 57 | // 创建引擎 58 | auto engine = unique_ptr(builder->buildSerializedNetwork(*network, *config)); 59 | 60 | //序列化保存engine 61 | ofstream engine_file("./yolov8s.engine", ios::binary); 62 | assert(engine_file.is_open() && "Failed to open engine file"); 63 | engine_file.write((char *)engine->data(), engine->size()); 64 | engine_file.close(); 65 | 66 | cout << "Engine build success!" << endl; 67 | return 0; 68 | } -------------------------------------------------------------------------------- /yolov8/cmake/FindTensorRT.cmake: -------------------------------------------------------------------------------- 1 | ## find tensorrt 2 | include(FindPackageHandleStandardArgs) 3 | 4 | ## 用户可以输入的TensorRT 搜索路径 5 | set(TensorRT_ROOT 6 | "" 7 | CACHE 8 | PATH 9 | "TensorRT root directory") 10 | 11 | ## 设置TensorRT 搜索路径 12 | set(TensorRT_SEARCH_PATH 13 | /usr/include/x86_64-linux-gnu 14 | /usr/src/tensorrt 15 | /usr/lib/x86_64-linux-gnu 16 | ${TensorRT_ROOT} 17 | ) 18 | 19 | ## 设置需要搜索的TensorRT 依赖库 20 | set(TensorRT_ALL_LIBS 21 | nvinfer 22 | nvinfer_plugin 23 | nvparsers 24 | nvonnxparser 25 | ) 26 | 27 | ## 提前设置后面需要用的变量 28 | set(TensorRT_LIBS_LIST) 29 | set(TensorRT_LIBRARIES) 30 | 31 | ## 搜索头文件的路径 32 | find_path( 33 | TensorRT_INCLUDE_DIR 34 | NAMES NvInfer.h 35 | PATHS ${TensorRT_SEARCH_PATH} 36 | ) 37 | 38 | ## 利用头文件路径下的version文件来设置TensorRT的版本信息 39 | if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h") 40 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") 41 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") 42 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") 43 | 44 | string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") 45 | string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") 46 | string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") 47 | set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") 48 | endif() 49 | message("TensorRT version: ${TensorRT_VERSION_STRING}") 50 | 51 | ## 搜索sample code的路径 52 | find_path( 53 | TensorRT_SAMPLE_DIR 54 | NAMES trtexec/trtexec.cpp 55 | PATHS ${TensorRT_SEARCH_PATH} 56 | PATH_SUFFIXES samples 57 | ) 58 | 59 | ## 依次搜索TensorRT依赖库 60 | foreach(lib ${TensorRT_ALL_LIBS} ) 61 | find_library( 62 | TensorRT_${lib}_LIBRARY 63 | NAMES ${lib} 64 | PATHS ${TensorRT_SEARCH_PATH} 65 | ) 66 | ## 存储TensorRT的依赖库变量 67 | set(TensorRT_LIBS_VARS TensorRT_${lib}_LIBRARY ${TensorRT_LIBS_LIST}) 68 | ## 也是TensorRT的依赖库,存成list,方便后面用foreach 69 | list(APPEND TensorRT_LIBS_LIST TensorRT_${lib}_LIBRARY) 70 | endforeach() 71 | 72 | ## 调用cmake内置功能,设置基础变量如xxx_FOUND 73 | find_package_handle_standard_args(TensorRT REQUIRED_VARS TensorRT_INCLUDE_DIR TensorRT_SAMPLE_DIR ${TensorRT_LIBS_VARS}) 74 | 75 | if(TensorRT_FOUND) 76 | ## 设置Tensor_LIBRARIES变量 77 | foreach(lib ${TensorRT_LIBS_LIST} ) 78 | list(APPEND TensorRT_LIBRARIES ${${lib}}) 79 | endforeach() 80 | message("Found TensorRT: ${TensorRT_INCLUDE_DIR} ${TensorRT_LIBRARIES} ${TensorRT_SAMPLE_DIR}") 81 | message("TensorRT version: ${TensorRT_VERSION_STRING}") 82 | endif() -------------------------------------------------------------------------------- /yolov8/img/img0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/yolov8/img/img0.jpg -------------------------------------------------------------------------------- /yolov8/img/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyberyang123/Learning-TensorRT/bcd32dbe464df0818ab86e9481568f2b45b90547/yolov8/img/img1.jpg -------------------------------------------------------------------------------- /yolov8/runtime.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "yolov8_utils.h" 19 | 20 | 21 | // 以下示例捕获所有警告消息,但忽略信息性消息 22 | class Logger : public nvinfer1::ILogger 23 | { 24 | void log(Severity severity, const char* msg) noexcept override 25 | { 26 | // 抑制信息级别的消息 27 | if (severity <= Severity::kWARNING) 28 | std::cout << msg << std::endl; 29 | } 30 | }; 31 | 32 | // 加载模型文件 33 | std::vector load_engine_file(const std::string &file_name) 34 | { 35 | std::vector engine_data; 36 | std::ifstream engine_file(file_name, std::ios::binary); 37 | assert(engine_file.is_open() && "Unable to load engine file."); 38 | engine_file.seekg(0, engine_file.end); 39 | int length = engine_file.tellg(); 40 | engine_data.resize(length); 41 | engine_file.seekg(0, engine_file.beg); 42 | engine_file.read(reinterpret_cast(engine_data.data()), length); 43 | return engine_data; 44 | } 45 | 46 | 47 | int main(int argc, char **argv) 48 | { 49 | // 实例化ILogger 50 | Logger logger; 51 | 52 | std::unique_ptr runtime = std::unique_ptr(nvinfer1::createInferRuntime(logger)); 53 | if (runtime == nullptr){return false;} 54 | 55 | std::string file_path = "yolov8s.engine"; 56 | auto plan = load_engine_file(file_path); 57 | 58 | auto engine = std::unique_ptr(runtime->deserializeCudaEngine(plan.data(), plan.size())); 59 | if (engine == nullptr){return false;} 60 | 61 | auto context = std::unique_ptr(engine->createExecutionContext()); 62 | if (context == nullptr){return false;} 63 | 64 | auto idims = engine->getTensorShape("images"); 65 | auto odims = engine->getTensorShape("output0"); 66 | nvinfer1::Dims4 inputDims = { idims.d[0], idims.d[1], idims.d[2], idims.d[3] }; 67 | nvinfer1::Dims3 outputDims = { idims.d[0], idims.d[1], idims.d[2] }; 68 | context->setInputShape("images", inputDims); 69 | 70 | void* buffers[2]; 71 | const int inputIndex = 0; 72 | const int outputIndex = 1; 73 | 74 | cudaMalloc(&buffers[inputIndex], idims.d[0] * idims.d[1] * idims.d[2] * idims.d[3] * sizeof(float)); 75 | cudaMalloc(&buffers[outputIndex], odims.d[0] * odims.d[1] * odims.d[2] * sizeof(float)); 76 | 77 | // 设定数据地址 78 | context->setTensorAddress("images", buffers[inputIndex]); 79 | context->setTensorAddress("output0", buffers[outputIndex]); 80 | 81 | // 创建cuda流 82 | cudaStream_t stream; 83 | cudaStreamCreate(&stream); 84 | 85 | // 读取文件执行推理 86 | for(int i = 0; i < 2; i += 1) 87 | { 88 | // 读取图片 89 | cv::Mat img; 90 | std::string file_name = "img/img" + std::to_string(i) + ".jpg"; 91 | img = cv::imread(file_name); 92 | if (img.empty()) //检测image有无数据,无数据 image.empty()返回 真 93 | { 94 | std::cout << "Could not open or find the image" << std::endl; 95 | return -1; 96 | } 97 | 98 | cv::Mat LetterBoxImg; 99 | cv::Vec4d params; 100 | LetterBox(img, LetterBoxImg, params, cv::Size(640, 640)); 101 | 102 | cv::Mat blob; 103 | cv::dnn::blobFromImage(img, blob, 1 / 255.0, cv::Size(640, 640), cv::Scalar(0, 0, 0), true, false, CV_32F); 104 | 105 | // 将图像拷贝到GPU 106 | cudaMemcpyAsync(buffers[inputIndex], blob.data,3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice, stream); 107 | 108 | //执行推理 109 | if(context->enqueueV3(stream)){ 110 | std::cout << "enqueued successfully!" << std::endl; 111 | } 112 | cudaStreamSynchronize(stream); 113 | 114 | float rst[1][84][8400]; 115 | cudaMemcpyAsync(&rst, buffers[outputIndex], 1 * 84 * 8400 * sizeof(float), cudaMemcpyDeviceToHost, stream); 116 | 117 | postprocess(rst, img, params); 118 | } 119 | 120 | cudaStreamDestroy(stream); 121 | cudaFree(buffers[inputIndex]); 122 | cudaFree(buffers[outputIndex]); 123 | } -------------------------------------------------------------------------------- /yolov8/yolov8_utils.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "yolov8_utils.h" 3 | 4 | using namespace cv; 5 | using namespace std; 6 | 7 | void LetterBox(const cv::Mat& image, cv::Mat& outImage, cv::Vec4d& params, const cv::Size& newShape, 8 | bool autoShape, bool scaleFill, bool scaleUp, int stride, const cv::Scalar& color) 9 | { 10 | if (false) { 11 | int maxLen = MAX(image.rows, image.cols); 12 | outImage = Mat::zeros(Size(maxLen, maxLen), CV_8UC3); 13 | image.copyTo(outImage(Rect(0, 0, image.cols, image.rows))); 14 | params[0] = 1; 15 | params[1] = 1; 16 | params[3] = 0; 17 | params[2] = 0; 18 | } 19 | 20 | cv::Size shape = image.size(); 21 | float r = std::min((float)newShape.height / (float)shape.height, 22 | (float)newShape.width / (float)shape.width); 23 | if (!scaleUp) 24 | r = std::min(r, 1.0f); 25 | 26 | float ratio[2]{ r, r }; 27 | int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) }; 28 | 29 | auto dw = (float)(newShape.width - new_un_pad[0]); 30 | auto dh = (float)(newShape.height - new_un_pad[1]); 31 | 32 | if (autoShape) 33 | { 34 | dw = (float)((int)dw % stride); 35 | dh = (float)((int)dh % stride); 36 | } 37 | else if (scaleFill) 38 | { 39 | dw = 0.0f; 40 | dh = 0.0f; 41 | new_un_pad[0] = newShape.width; 42 | new_un_pad[1] = newShape.height; 43 | ratio[0] = (float)newShape.width / (float)shape.width; 44 | ratio[1] = (float)newShape.height / (float)shape.height; 45 | } 46 | 47 | dw /= 2.0f; 48 | dh /= 2.0f; 49 | 50 | if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1]) 51 | { 52 | cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1])); 53 | } 54 | else { 55 | outImage = image.clone(); 56 | } 57 | 58 | int top = int(std::round(dh - 0.1f)); 59 | int bottom = int(std::round(dh + 0.1f)); 60 | int left = int(std::round(dw - 0.1f)); 61 | int right = int(std::round(dw + 0.1f)); 62 | params[0] = ratio[0]; 63 | params[1] = ratio[1]; 64 | params[2] = left; 65 | params[3] = top; 66 | cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); 67 | } 68 | 69 | void postprocess(float (&rst)[1][84][8400], cv::Mat &img, cv::Vec4d params) 70 | { 71 | std::vector boxes; 72 | std::vector scores; 73 | std::vector det_rst; 74 | static const float score_threshold = 0.6; 75 | static const float nms_threshold = 0.45; 76 | std::vector indices; 77 | 78 | for(int Anchors=0 ;Anchors < 8400; Anchors++) 79 | { 80 | float max_score = 0.0; 81 | int max_score_det = 99; 82 | float pdata[4]; 83 | for(int prob = 4; prob < 84; prob++) 84 | { 85 | if(rst[0][prob][Anchors] > max_score){ 86 | max_score = rst[0][prob][Anchors]; 87 | max_score_det = prob - 4; 88 | pdata[0] = rst[0][0][Anchors]; 89 | pdata[1] = rst[0][1][Anchors]; 90 | pdata[2] = rst[0][2][Anchors]; 91 | pdata[3] = rst[0][3][Anchors]; 92 | } 93 | } 94 | if(max_score >= score_threshold) 95 | { 96 | float x = (pdata[0] - params[2]) / params[0]; 97 | float y = (pdata[1] - params[3]) / params[1]; 98 | float w = pdata[2] / params[0]; 99 | float h = pdata[3] / params[1]; 100 | int left = MAX(int(x - 0.5 * w + 0.5), 0); 101 | int top = MAX(int(y - 0.5 * h + 0.5), 0); 102 | boxes.push_back(Rect(left, top, int(w + 0.5), int(h + 0.5))); 103 | scores.emplace_back(max_score); 104 | det_rst.emplace_back(max_score_det); 105 | } 106 | } 107 | 108 | cv::dnn::NMSBoxes(boxes, scores, score_threshold, nms_threshold, indices); 109 | 110 | for (int i = 0; i < indices.size(); i++) { 111 | std::cout << boxes[indices[i]] << std::endl; 112 | cv::rectangle(img, boxes[indices[i]], Scalar(255, 0, 0), 2, LINE_8,0); 113 | } 114 | 115 | cv::imshow("rst",img); 116 | cv::waitKey(0); 117 | } -------------------------------------------------------------------------------- /yolov8/yolov8_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | void LetterBox(const cv::Mat& image, cv::Mat& outImage, 7 | cv::Vec4d& params, //[ratio_x,ratio_y,dw,dh] 8 | const cv::Size& newShape = cv::Size(640, 640), 9 | bool autoShape = false, 10 | bool scaleFill = false, 11 | bool scaleUp = true, 12 | int stride = 32, 13 | const cv::Scalar& color = cv::Scalar(0, 0, 0) 14 | ); 15 | 16 | void postprocess(float (&rst)[1][84][8400], cv::Mat &img, cv::Vec4d params); -------------------------------------------------------------------------------- /yolov8_accelerate/.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Linux", 5 | "includePath": [ 6 | "${workspaceFolder}/**", 7 | "/usr/include/opencv4" 8 | ], 9 | "defines": [], 10 | "compilerPath": "/usr/bin/gcc", 11 | "cStandard": "c17", 12 | "cppStandard": "gnu++14", 13 | "intelliSenseMode": "linux-gcc-x64", 14 | "configurationProvider": "ms-vscode.cmake-tools" 15 | } 16 | ], 17 | "version": 4 18 | } -------------------------------------------------------------------------------- /yolov8_accelerate/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 IntelliSense 了解相关属性。 3 | // 悬停以查看现有属性的描述。 4 | // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "cmake - 生成和调试活动文件", 9 | "type": "cppdbg", 10 | "request": "launch", 11 | "program": "${fileDirname}/build/${fileBasenameNoExtension}", 12 | "args": [], 13 | "stopAtEntry": false, 14 | "cwd": "${fileDirname}", 15 | "environment": [], 16 | "externalConsole": false, 17 | "MIMode": "gdb", 18 | "setupCommands": [ 19 | { 20 | "description": "为 gdb 启用整齐打印", 21 | "text": "-enable-pretty-printing", 22 | "ignoreFailures": true 23 | }, 24 | { 25 | "description": "将反汇编风格设置为 Intel", 26 | "text": "-gdb-set disassembly-flavor intel", 27 | "ignoreFailures": true 28 | } 29 | ], 30 | "preLaunchTask": "CMake: 构建", 31 | "miDebuggerPath": "/usr/bin/gdb" 32 | } 33 | ] 34 | } -------------------------------------------------------------------------------- /yolov8_accelerate/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "*.py": "python", 4 | "iosfwd": "cpp", 5 | "*.tcc": "cpp", 6 | "cctype": "cpp", 7 | "clocale": "cpp", 8 | "cmath": "cpp", 9 | "cstdarg": "cpp", 10 | "cstddef": "cpp", 11 | "cstdio": "cpp", 12 | "cstdlib": "cpp", 13 | "cstring": "cpp", 14 | "ctime": "cpp", 15 | "cwchar": "cpp", 16 | "cwctype": "cpp", 17 | "array": "cpp", 18 | "atomic": "cpp", 19 | "bitset": "cpp", 20 | "chrono": "cpp", 21 | "complex": "cpp", 22 | "condition_variable": "cpp", 23 | "cstdint": "cpp", 24 | "deque": "cpp", 25 | "list": "cpp", 26 | "unordered_map": "cpp", 27 | "vector": "cpp", 28 | "exception": "cpp", 29 | "algorithm": "cpp", 30 | "functional": "cpp", 31 | "iterator": "cpp", 32 | "map": "cpp", 33 | "memory": "cpp", 34 | "memory_resource": "cpp", 35 | "numeric": "cpp", 36 | "optional": "cpp", 37 | "random": "cpp", 38 | "ratio": "cpp", 39 | "set": "cpp", 40 | "string": "cpp", 41 | "string_view": "cpp", 42 | "system_error": "cpp", 43 | "tuple": "cpp", 44 | "type_traits": "cpp", 45 | "utility": "cpp", 46 | "fstream": "cpp", 47 | "initializer_list": "cpp", 48 | "iomanip": "cpp", 49 | "iostream": "cpp", 50 | "istream": "cpp", 51 | "limits": "cpp", 52 | "mutex": "cpp", 53 | "new": "cpp", 54 | "ostream": "cpp", 55 | "sstream": "cpp", 56 | "stdexcept": "cpp", 57 | "streambuf": "cpp", 58 | "thread": "cpp", 59 | "cinttypes": "cpp", 60 | "typeindex": "cpp", 61 | "typeinfo": "cpp", 62 | "variant": "cpp", 63 | "bit": "cpp" 64 | }, 65 | "cmake.configureOnOpen": false 66 | } -------------------------------------------------------------------------------- /yolov8_accelerate/.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | // cmake 配置 5 | { 6 | "type": "cppbuild", 7 | "label": "CMake: 配置", 8 | "command": "cmake", 9 | "args": [ 10 | "-DCMAKE_BUILD_TYPE=debug", 11 | "-S", 12 | ".", 13 | "-B", 14 | "build" 15 | ], 16 | "options": { 17 | "cwd" : "${workspaceFolder}" 18 | }, 19 | "group": { 20 | "kind": "build", 21 | }, 22 | }, 23 | 24 | // cmake 构建 25 | { 26 | "type": "cppbuild", 27 | "label": "CMake: 构建", 28 | "command": "cmake", 29 | "args": [ 30 | "--build", 31 | "build" 32 | ], 33 | "options": { 34 | "cwd" : "${workspaceFolder}" 35 | }, 36 | "dependsOn" : ["CMake: 配置"], 37 | "group": { 38 | "kind": "build", 39 | "isDefault": true 40 | }, 41 | } 42 | ] 43 | } -------------------------------------------------------------------------------- /yolov8_accelerate/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(yolov8 LANGUAGES CXX CUDA) 3 | 4 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") 5 | find_package(TensorRT REQUIRED) 6 | find_package(OpenCV REQUIRED) 7 | 8 | add_executable(build 9 | build.cu 10 | calibrator.cpp 11 | yolov8_utils.cpp 12 | ) 13 | target_link_libraries(build PUBLIC ${PROJECT_BINARY_DIR} ${TensorRT_LIBRARIES} ${OpenCV_LIBS}) 14 | target_include_directories(build PUBLIC ${PROJECT_BINARY_DIR} ${TensorRT_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS}) 15 | 16 | add_executable(runtime 17 | runtime.cu 18 | yolov8_utils.cpp 19 | ) 20 | target_link_libraries(runtime PUBLIC ${PROJECT_BINARY_DIR} ${TensorRT_LIBRARIES} ${OpenCV_LIBS}) 21 | target_include_directories(runtime PUBLIC ${PROJECT_BINARY_DIR} ${TensorRT_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS}) -------------------------------------------------------------------------------- /yolov8_accelerate/build.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "yolov8_utils.h" 17 | #include "calibrator.h" 18 | 19 | using namespace std; 20 | using namespace nvinfer1; 21 | 22 | // 以下示例捕获所有警告消息,但忽略信息性消息 23 | class Logger : public ILogger 24 | { 25 | void log(Severity severity, const char* msg) noexcept override 26 | { 27 | // 抑制信息级别的消息 28 | if (severity <= Severity::kWARNING) 29 | cout << msg << endl; 30 | } 31 | }; 32 | 33 | int main(int argc, char **argv) 34 | { 35 | // 实例化ILogger 36 | Logger logger; 37 | 38 | // 创建builder 39 | auto builder = unique_ptr(createInferBuilder(logger)); 40 | 41 | // 创建网络(显性batch) 42 | uint32_t flag = 1U < 43 | (NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 44 | auto network = unique_ptr(builder->createNetworkV2(flag)); 45 | 46 | // 创建ONNX解析器:parser 47 | auto parser = unique_ptr(nvonnxparser::createParser(*network, logger)); 48 | // 读取文件 49 | char *file_path = "yolov8x.onnx"; 50 | parser->parseFromFile(file_path, static_cast(ILogger::Severity::kWARNING)); 51 | 52 | // 创建构建配置,用来指定trt如何优化模型 53 | auto config = unique_ptr(builder->createBuilderConfig()); 54 | // 设定配置 55 | // 工作空间大小 56 | config->setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, 1U << 20); 57 | // 设置精度 58 | config->setFlag(nvinfer1::BuilderFlag::kINT8); 59 | config->setFlag(nvinfer1::BuilderFlag::kFP16); 60 | auto* calibrator = new Int8Calibrator(1, 640, 640, "./coco_calib/", "int8calib.table", ""); 61 | config->setInt8Calibrator(calibrator); 62 | 63 | // 创建引擎 64 | auto engine = unique_ptr(builder->buildSerializedNetwork(*network, *config)); 65 | 66 | //序列化保存engine 67 | ofstream engine_file("./yolov8x.engine", ios::binary); 68 | assert(engine_file.is_open() && "Failed to open engine file"); 69 | engine_file.write((char *)engine->data(), engine->size()); 70 | engine_file.close(); 71 | 72 | cout << "Engine build success!" << endl; 73 | return 0; 74 | } -------------------------------------------------------------------------------- /yolov8_accelerate/calibrator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "calibrator.h" 7 | #include "yolov8_utils.h" 8 | #include "cuda_utils.h" 9 | 10 | static inline int read_files_in_dir(const char *p_dir_name, std::vector &file_names) { 11 | DIR *p_dir = opendir(p_dir_name); 12 | if (p_dir == nullptr) { 13 | return -1; 14 | } 15 | 16 | struct dirent* p_file = nullptr; 17 | while ((p_file = readdir(p_dir)) != nullptr) { 18 | if (strcmp(p_file->d_name, ".") != 0 && 19 | strcmp(p_file->d_name, "..") != 0) { 20 | //std::string cur_file_name(p_dir_name); 21 | //cur_file_name += "/"; 22 | //cur_file_name += p_file->d_name; 23 | std::string cur_file_name(p_file->d_name); 24 | file_names.push_back(cur_file_name); 25 | } 26 | } 27 | 28 | closedir(p_dir); 29 | return 0; 30 | } 31 | 32 | Int8Calibrator::Int8Calibrator(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, 33 | const char* input_blob_name, bool read_cache) 34 | : batchsize_(batchsize) 35 | , input_w_(input_w) 36 | , input_h_(input_h) 37 | , img_idx_(0) 38 | , img_dir_(img_dir) 39 | , calib_table_name_(calib_table_name) 40 | , input_blob_name_(input_blob_name) 41 | , read_cache_(read_cache) 42 | { 43 | input_count_ = 3 * input_w * input_h * batchsize; 44 | CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float))); 45 | read_files_in_dir(img_dir, img_files_); 46 | } 47 | 48 | Int8Calibrator::~Int8Calibrator() 49 | { 50 | CUDA_CHECK(cudaFree(device_input_)); 51 | } 52 | 53 | int Int8Calibrator::getBatchSize() const noexcept 54 | { 55 | return batchsize_; 56 | } 57 | 58 | bool Int8Calibrator::getBatch(void* bindings[], const char* names[], int nbBindings) noexcept 59 | { 60 | if (img_idx_ + batchsize_ > (int)img_files_.size()) { 61 | return false; 62 | } 63 | 64 | std::vector input_imgs_; 65 | for (int i = img_idx_; i < img_idx_ + batchsize_; i++) { 66 | std::cout << img_files_[i] << " " << i << std::endl; 67 | cv::Mat temp = cv::imread(img_dir_ + img_files_[i]); 68 | if (temp.empty()){ 69 | std::cerr << "Fatal error: image cannot open!" << std::endl; 70 | return false; 71 | } 72 | cv::Mat LetterBoxImg; 73 | cv::Vec4d params; 74 | LetterBox(temp, LetterBoxImg, params, cv::Size(640, 640)); 75 | input_imgs_.push_back(LetterBoxImg); 76 | } 77 | img_idx_ += batchsize_; 78 | cv::Mat blob = cv::dnn::blobFromImage(input_imgs_[0], 1 / 255.0, cv::Size(640, 640), cv::Scalar(0, 0, 0), true, false, CV_32F); 79 | CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice)); 80 | // assert(!strcmp(names[0], input_blob_name_)); 81 | bindings[0] = device_input_; 82 | return true; 83 | } 84 | 85 | const void* Int8Calibrator::readCalibrationCache(size_t& length) noexcept 86 | { 87 | std::cout << "reading calib cache: " << calib_table_name_ << std::endl; 88 | calib_cache_.clear(); 89 | std::ifstream input(calib_table_name_, std::ios::binary); 90 | input >> std::noskipws; 91 | if (read_cache_ && input.good()) 92 | { 93 | std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(calib_cache_)); 94 | } 95 | length = calib_cache_.size(); 96 | return length ? calib_cache_.data() : nullptr; 97 | } 98 | 99 | void Int8Calibrator::writeCalibrationCache(const void* cache, size_t length) noexcept 100 | { 101 | std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl; 102 | std::ofstream output(calib_table_name_, std::ios::binary); 103 | output.write(reinterpret_cast(cache), length); 104 | } 105 | -------------------------------------------------------------------------------- /yolov8_accelerate/calibrator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class Int8Calibrator : public nvinfer1::IInt8MinMaxCalibrator 8 | { 9 | public: 10 | Int8Calibrator(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = false); 11 | virtual ~Int8Calibrator(); 12 | int getBatchSize() const noexcept override; 13 | bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; 14 | const void* readCalibrationCache(size_t& length) noexcept override; 15 | void writeCalibrationCache(const void* cache, size_t length) noexcept override; 16 | 17 | private: 18 | int batchsize_; 19 | int input_w_; 20 | int input_h_; 21 | int img_idx_; 22 | std::string img_dir_; 23 | std::vector img_files_; 24 | size_t input_count_; 25 | std::string calib_table_name_; 26 | const char* input_blob_name_; 27 | bool read_cache_; 28 | void* device_input_; 29 | std::vector calib_cache_; 30 | }; 31 | -------------------------------------------------------------------------------- /yolov8_accelerate/cmake/FindTensorRT.cmake: -------------------------------------------------------------------------------- 1 | ## find tensorrt 2 | include(FindPackageHandleStandardArgs) 3 | 4 | ## 用户可以输入的TensorRT 搜索路径 5 | set(TensorRT_ROOT 6 | "" 7 | CACHE 8 | PATH 9 | "TensorRT root directory") 10 | 11 | ## 设置TensorRT 搜索路径 12 | set(TensorRT_SEARCH_PATH 13 | /usr/include/x86_64-linux-gnu 14 | /usr/src/tensorrt 15 | /usr/lib/x86_64-linux-gnu 16 | ${TensorRT_ROOT} 17 | ) 18 | 19 | ## 设置需要搜索的TensorRT 依赖库 20 | set(TensorRT_ALL_LIBS 21 | nvinfer 22 | nvinfer_plugin 23 | nvparsers 24 | nvonnxparser 25 | ) 26 | 27 | ## 提前设置后面需要用的变量 28 | set(TensorRT_LIBS_LIST) 29 | set(TensorRT_LIBRARIES) 30 | 31 | ## 搜索头文件的路径 32 | find_path( 33 | TensorRT_INCLUDE_DIR 34 | NAMES NvInfer.h 35 | PATHS ${TensorRT_SEARCH_PATH} 36 | ) 37 | 38 | ## 利用头文件路径下的version文件来设置TensorRT的版本信息 39 | if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h") 40 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") 41 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") 42 | file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") 43 | 44 | string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") 45 | string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") 46 | string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") 47 | set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") 48 | endif() 49 | message("TensorRT version: ${TensorRT_VERSION_STRING}") 50 | 51 | ## 搜索sample code的路径 52 | find_path( 53 | TensorRT_SAMPLE_DIR 54 | NAMES trtexec/trtexec.cpp 55 | PATHS ${TensorRT_SEARCH_PATH} 56 | PATH_SUFFIXES samples 57 | ) 58 | 59 | ## 依次搜索TensorRT依赖库 60 | foreach(lib ${TensorRT_ALL_LIBS} ) 61 | find_library( 62 | TensorRT_${lib}_LIBRARY 63 | NAMES ${lib} 64 | PATHS ${TensorRT_SEARCH_PATH} 65 | ) 66 | ## 存储TensorRT的依赖库变量 67 | set(TensorRT_LIBS_VARS TensorRT_${lib}_LIBRARY ${TensorRT_LIBS_LIST}) 68 | ## 也是TensorRT的依赖库,存成list,方便后面用foreach 69 | list(APPEND TensorRT_LIBS_LIST TensorRT_${lib}_LIBRARY) 70 | endforeach() 71 | 72 | ## 调用cmake内置功能,设置基础变量如xxx_FOUND 73 | find_package_handle_standard_args(TensorRT REQUIRED_VARS TensorRT_INCLUDE_DIR TensorRT_SAMPLE_DIR ${TensorRT_LIBS_VARS}) 74 | 75 | if(TensorRT_FOUND) 76 | ## 设置Tensor_LIBRARIES变量 77 | foreach(lib ${TensorRT_LIBS_LIST} ) 78 | list(APPEND TensorRT_LIBRARIES ${${lib}}) 79 | endforeach() 80 | message("Found TensorRT: ${TensorRT_INCLUDE_DIR} ${TensorRT_LIBRARIES} ${TensorRT_SAMPLE_DIR}") 81 | message("TensorRT version: ${TensorRT_VERSION_STRING}") 82 | endif() -------------------------------------------------------------------------------- /yolov8_accelerate/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef TRTX_CUDA_UTILS_H_ 2 | #define TRTX_CUDA_UTILS_H_ 3 | 4 | #include 5 | 6 | #ifndef CUDA_CHECK 7 | #define CUDA_CHECK(callstr)\ 8 | {\ 9 | cudaError_t error_code = callstr;\ 10 | if (error_code != cudaSuccess) {\ 11 | std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\ 12 | assert(0);\ 13 | }\ 14 | } 15 | #endif // CUDA_CHECK 16 | 17 | #endif // TRTX_CUDA_UTILS_H_ 18 | -------------------------------------------------------------------------------- /yolov8_accelerate/int8calib.table: -------------------------------------------------------------------------------- 1 | TRT-8503-MinMaxCalibration 2 | images: 3c010204 3 | /model.0/conv/Conv_output_0: 3fb4d442 4 | /model.0/act/Sigmoid_output_0: 3c010204 5 | /model.0/act/Mul_output_0: 3fb48181 6 | /model.1/conv/Conv_output_0: 3fdb3a67 7 | /model.1/act/Sigmoid_output_0: 3c010204 8 | /model.1/act/Mul_output_0: 3fdb3a67 9 | /model.2/cv1/conv/Conv_output_0: 3f5c0cd6 10 | /model.2/cv1/act/Sigmoid_output_0: 3c010204 11 | /model.2/cv1/act/Mul_output_0: 3ecf0b6f 12 | /model.2/Split_output_0: 3ecf0b6f 13 | /model.2/Split_output_1: 3e19b778 14 | /model.2/m.0/cv1/conv/Conv_output_0: 3e98b4f3 15 | /model.2/m.0/cv1/act/Sigmoid_output_0: 3c010204 16 | /model.2/m.0/cv1/act/Mul_output_0: 3e435994 17 | /model.2/m.0/cv2/conv/Conv_output_0: 3e3a5976 18 | /model.2/m.0/cv2/act/Sigmoid_output_0: 3c0101d8 19 | /model.2/m.0/cv2/act/Mul_output_0: 3dc4044b 20 | /model.2/m.0/Add_output_0: 3e188404 21 | /model.2/m.1/cv1/conv/Conv_output_0: 3e3af069 22 | /model.2/m.1/cv1/act/Sigmoid_output_0: 3c010204 23 | /model.2/m.1/cv1/act/Mul_output_0: 3e1d475d 24 | /model.2/m.1/cv2/conv/Conv_output_0: 3e255402 25 | /model.2/m.1/cv2/act/Sigmoid_output_0: 3c0101fd 26 | /model.2/m.1/cv2/act/Mul_output_0: 3de1ef65 27 | /model.2/m.1/Add_output_0: 3e3365ce 28 | /model.2/m.2/cv1/conv/Conv_output_0: 3db980ba 29 | /model.2/m.2/cv1/act/Sigmoid_output_0: 3c00f41a 30 | /model.2/m.2/cv1/act/Mul_output_0: 3d7a8bed 31 | /model.2/m.2/cv2/conv/Conv_output_0: 3e50f6d6 32 | /model.2/m.2/cv2/act/Sigmoid_output_0: 3c010204 33 | /model.2/m.2/cv2/act/Mul_output_0: 3e50f6d6 34 | /model.2/m.2/Add_output_0: 3e4c6390 35 | /model.2/Concat_output_0: 3ecf0b6f 36 | /model.2/cv2/conv/Conv_output_0: 3ea89178 37 | /model.2/cv2/act/Sigmoid_output_0: 3c0101fe 38 | /model.2/cv2/act/Mul_output_0: 3de4cb58 39 | /model.3/conv/Conv_output_0: 3e17207f 40 | /model.3/act/Sigmoid_output_0: 3c010204 41 | /model.3/act/Mul_output_0: 3e17207f 42 | /model.4/cv1/conv/Conv_output_0: 3e329f49 43 | /model.4/cv1/act/Sigmoid_output_0: 3c010204 44 | /model.4/cv1/act/Mul_output_0: 3e329f49 45 | /model.4/Split_output_0: 3e329f49 46 | /model.4/Split_output_1: 3dc81fb2 47 | /model.4/m.0/cv1/conv/Conv_output_0: 3dc1c5ff 48 | /model.4/m.0/cv1/act/Sigmoid_output_0: 3c0101d2 49 | /model.4/m.0/cv1/act/Mul_output_0: 3dc1c5b3 50 | /model.4/m.0/cv2/conv/Conv_output_0: 3d86ae70 51 | /model.4/m.0/cv2/act/Sigmoid_output_0: 3c007717 52 | /model.4/m.0/cv2/act/Mul_output_0: 3d2f9312 53 | /model.4/m.0/Add_output_0: 3dc3ab76 54 | /model.4/m.1/cv1/conv/Conv_output_0: 3dd2532a 55 | /model.4/m.1/cv1/act/Sigmoid_output_0: 3c0101f2 56 | /model.4/m.1/cv1/act/Mul_output_0: 3dd2530d 57 | /model.4/m.1/cv2/conv/Conv_output_0: 3ddd4238 58 | /model.4/m.1/cv2/act/Sigmoid_output_0: 3c0101fb 59 | /model.4/m.1/cv2/act/Mul_output_0: 3ddd4229 60 | /model.4/m.1/Add_output_0: 3df5ee04 61 | /model.4/m.2/cv1/conv/Conv_output_0: 3db468cb 62 | /model.4/m.2/cv1/act/Sigmoid_output_0: 3c01018e 63 | /model.4/m.2/cv1/act/Mul_output_0: 3db46825 64 | /model.4/m.2/cv2/conv/Conv_output_0: 3df8bf47 65 | /model.4/m.2/cv2/act/Sigmoid_output_0: 3c010202 66 | /model.4/m.2/cv2/act/Mul_output_0: 3df8bf43 67 | /model.4/m.2/Add_output_0: 3e08054d 68 | /model.4/m.3/cv1/conv/Conv_output_0: 3db5ff45 69 | /model.4/m.3/cv1/act/Sigmoid_output_0: 3c010135 70 | /model.4/m.3/cv1/act/Mul_output_0: 3dab2bb8 71 | /model.4/m.3/cv2/conv/Conv_output_0: 3dd2e8ba 72 | /model.4/m.3/cv2/act/Sigmoid_output_0: 3c0101df 73 | /model.4/m.3/cv2/act/Mul_output_0: 3dc71291 74 | /model.4/m.3/Add_output_0: 3e0e559f 75 | /model.4/m.4/cv1/conv/Conv_output_0: 3db4d9e1 76 | /model.4/m.4/cv1/act/Sigmoid_output_0: 3c010191 77 | /model.4/m.4/cv1/act/Mul_output_0: 3db4d93f 78 | /model.4/m.4/cv2/conv/Conv_output_0: 3e476e13 79 | /model.4/m.4/cv2/act/Sigmoid_output_0: 3c010204 80 | /model.4/m.4/cv2/act/Mul_output_0: 3e476e13 81 | /model.4/m.4/Add_output_0: 3e62bd00 82 | /model.4/m.5/cv1/conv/Conv_output_0: 3db8a32e 83 | /model.4/m.5/cv1/act/Sigmoid_output_0: 3c01018b 84 | /model.4/m.5/cv1/act/Mul_output_0: 3db40401 85 | /model.4/m.5/cv2/conv/Conv_output_0: 3e775251 86 | /model.4/m.5/cv2/act/Sigmoid_output_0: 3c010204 87 | /model.4/m.5/cv2/act/Mul_output_0: 3e775251 88 | /model.4/m.5/Add_output_0: 3ea37028 89 | /model.4/Concat_output_0: 3ea37028 90 | /model.4/cv2/conv/Conv_output_0: 3e202c54 91 | /model.4/cv2/act/Sigmoid_output_0: 3c010204 92 | /model.4/cv2/act/Mul_output_0: 3e202c54 93 | /model.5/conv/Conv_output_0: 3e1667c2 94 | /model.5/act/Sigmoid_output_0: 3c010204 95 | /model.5/act/Mul_output_0: 3e1667c2 96 | /model.6/cv1/conv/Conv_output_0: 3e61adbf 97 | /model.6/cv1/act/Sigmoid_output_0: 3c010204 98 | /model.6/cv1/act/Mul_output_0: 3e61adbf 99 | /model.6/Split_output_0: 3e61adbf 100 | /model.6/Split_output_1: 3e121770 101 | /model.6/m.0/cv1/conv/Conv_output_0: 3dcd25e6 102 | /model.6/m.0/cv1/act/Sigmoid_output_0: 3c0101ca 103 | /model.6/m.0/cv1/act/Mul_output_0: 3dbfba44 104 | /model.6/m.0/cv2/conv/Conv_output_0: 3da7b5d1 105 | /model.6/m.0/cv2/act/Sigmoid_output_0: 3c00e982 106 | /model.6/m.0/cv2/act/Mul_output_0: 3d6835e1 107 | /model.6/m.0/Add_output_0: 3e106abb 108 | /model.6/m.1/cv1/conv/Conv_output_0: 3deaec96 109 | /model.6/m.1/cv1/act/Sigmoid_output_0: 3c010200 110 | /model.6/m.1/cv1/act/Mul_output_0: 3deaec8f 111 | /model.6/m.1/cv2/conv/Conv_output_0: 3daf4d77 112 | /model.6/m.1/cv2/act/Sigmoid_output_0: 3c010165 113 | /model.6/m.1/cv2/act/Mul_output_0: 3daf4c9e 114 | /model.6/m.1/Add_output_0: 3e1313ee 115 | /model.6/m.2/cv1/conv/Conv_output_0: 3ddbee4c 116 | /model.6/m.2/cv1/act/Sigmoid_output_0: 3c0101fa 117 | /model.6/m.2/cv1/act/Mul_output_0: 3ddbee3b 118 | /model.6/m.2/cv2/conv/Conv_output_0: 3dfaf97a 119 | /model.6/m.2/cv2/act/Sigmoid_output_0: 3c010203 120 | /model.6/m.2/cv2/act/Mul_output_0: 3dfaf977 121 | /model.6/m.2/Add_output_0: 3e1d28ba 122 | /model.6/m.3/cv1/conv/Conv_output_0: 3dedd75a 123 | /model.6/m.3/cv1/act/Sigmoid_output_0: 3c010201 124 | /model.6/m.3/cv1/act/Mul_output_0: 3dedd754 125 | /model.6/m.3/cv2/conv/Conv_output_0: 3e0a5f6a 126 | /model.6/m.3/cv2/act/Sigmoid_output_0: 3c010204 127 | /model.6/m.3/cv2/act/Mul_output_0: 3e0a5f6a 128 | /model.6/m.3/Add_output_0: 3e395430 129 | /model.6/m.4/cv1/conv/Conv_output_0: 3e09a00b 130 | /model.6/m.4/cv1/act/Sigmoid_output_0: 3c010204 131 | /model.6/m.4/cv1/act/Mul_output_0: 3e09a00b 132 | /model.6/m.4/cv2/conv/Conv_output_0: 3e352111 133 | /model.6/m.4/cv2/act/Sigmoid_output_0: 3c010204 134 | /model.6/m.4/cv2/act/Mul_output_0: 3e352111 135 | /model.6/m.4/Add_output_0: 3e5f5e2f 136 | /model.6/m.5/cv1/conv/Conv_output_0: 3de25977 137 | /model.6/m.5/cv1/act/Sigmoid_output_0: 3c0101fd 138 | /model.6/m.5/cv1/act/Mul_output_0: 3de2596b 139 | /model.6/m.5/cv2/conv/Conv_output_0: 3e5d5e1b 140 | /model.6/m.5/cv2/act/Sigmoid_output_0: 3c010204 141 | /model.6/m.5/cv2/act/Mul_output_0: 3e5d5e1b 142 | /model.6/m.5/Add_output_0: 3e9bf981 143 | /model.6/Concat_output_0: 3e9bf981 144 | /model.6/cv2/conv/Conv_output_0: 3e1eede3 145 | /model.6/cv2/act/Sigmoid_output_0: 3c010204 146 | /model.6/cv2/act/Mul_output_0: 3e1eede3 147 | /model.7/conv/Conv_output_0: 3e3541a5 148 | /model.7/act/Sigmoid_output_0: 3c010204 149 | /model.7/act/Mul_output_0: 3e3541a5 150 | /model.8/cv1/conv/Conv_output_0: 3e160dfa 151 | /model.8/cv1/act/Sigmoid_output_0: 3c010204 152 | /model.8/cv1/act/Mul_output_0: 3e160dfa 153 | /model.8/Split_output_0: 3e160dfa 154 | /model.8/Split_output_1: 3de96575 155 | /model.8/m.0/cv1/conv/Conv_output_0: 3e37fd74 156 | /model.8/m.0/cv1/act/Sigmoid_output_0: 3c010203 157 | /model.8/m.0/cv1/act/Mul_output_0: 3e0050d5 158 | /model.8/m.0/cv2/conv/Conv_output_0: 3e0c6fea 159 | /model.8/m.0/cv2/act/Sigmoid_output_0: 3c010202 160 | /model.8/m.0/cv2/act/Mul_output_0: 3df60595 161 | /model.8/m.0/Add_output_0: 3e12bc85 162 | /model.8/m.1/cv1/conv/Conv_output_0: 3e0053da 163 | /model.8/m.1/cv1/act/Sigmoid_output_0: 3c010203 164 | /model.8/m.1/cv1/act/Mul_output_0: 3e0053d9 165 | /model.8/m.1/cv2/conv/Conv_output_0: 3e0cbc84 166 | /model.8/m.1/cv2/act/Sigmoid_output_0: 3c010204 167 | /model.8/m.1/cv2/act/Mul_output_0: 3e0cbc84 168 | /model.8/m.1/Add_output_0: 3e12a6f2 169 | /model.8/m.2/cv1/conv/Conv_output_0: 3dfaa1b9 170 | /model.8/m.2/cv1/act/Sigmoid_output_0: 3c010203 171 | /model.8/m.2/cv1/act/Mul_output_0: 3dfaa1b6 172 | /model.8/m.2/cv2/conv/Conv_output_0: 3e11e3f3 173 | /model.8/m.2/cv2/act/Sigmoid_output_0: 3c010204 174 | /model.8/m.2/cv2/act/Mul_output_0: 3e11e3f3 175 | /model.8/m.2/Add_output_0: 3e23e1af 176 | /model.8/Concat_output_0: 3e23e1af 177 | /model.8/cv2/conv/Conv_output_0: 3df6fd2a 178 | /model.8/cv2/act/Sigmoid_output_0: 3c0101a2 179 | /model.8/cv2/act/Mul_output_0: 3db74dde 180 | /model.9/cv1/conv/Conv_output_0: 3d8d5c1b 181 | /model.9/cv1/act/Sigmoid_output_0: 3c00fce7 182 | /model.9/cv1/act/Mul_output_0: 3d8d5681 183 | /model.9/m/MaxPool_output_0: 3d8d5681 184 | /model.9/m_1/MaxPool_output_0: 3d8d5681 185 | /model.9/m_2/MaxPool_output_0: 3d8d5681 186 | /model.9/Concat_output_0: 3d8d5681 187 | /model.9/cv2/conv/Conv_output_0: 3d90a363 188 | /model.9/cv2/act/Sigmoid_output_0: 3c00f866 189 | /model.9/cv2/act/Mul_output_0: 3d834ad6 190 | /model.10/Resize_output_0: 3d834ad6 191 | /model.11/Concat_output_0: 3e1eede3 192 | /model.12/cv1/conv/Conv_output_0: 3e1eb782 193 | /model.12/cv1/act/Sigmoid_output_0: 3c010204 194 | /model.12/cv1/act/Mul_output_0: 3e1eb782 195 | /model.12/Split_output_0: 3e18ae89 196 | /model.12/Split_output_1: 3e1eb782 197 | /model.12/m.0/cv1/conv/Conv_output_0: 3e220147 198 | /model.12/m.0/cv1/act/Sigmoid_output_0: 3c010204 199 | /model.12/m.0/cv1/act/Mul_output_0: 3e220147 200 | /model.12/m.0/cv2/conv/Conv_output_0: 3e0c0b9d 201 | /model.12/m.0/cv2/act/Sigmoid_output_0: 3c010204 202 | /model.12/m.0/cv2/act/Mul_output_0: 3e0c0b9d 203 | /model.12/m.1/cv1/conv/Conv_output_0: 3e073db5 204 | /model.12/m.1/cv1/act/Sigmoid_output_0: 3c010204 205 | /model.12/m.1/cv1/act/Mul_output_0: 3e073db5 206 | /model.12/m.1/cv2/conv/Conv_output_0: 3dd6d56c 207 | /model.12/m.1/cv2/act/Sigmoid_output_0: 3c0101f6 208 | /model.12/m.1/cv2/act/Mul_output_0: 3dd6d556 209 | /model.12/m.2/cv1/conv/Conv_output_0: 3e39b7a4 210 | /model.12/m.2/cv1/act/Sigmoid_output_0: 3c010204 211 | /model.12/m.2/cv1/act/Mul_output_0: 3e39b7a4 212 | /model.12/m.2/cv2/conv/Conv_output_0: 3e254c92 213 | /model.12/m.2/cv2/act/Sigmoid_output_0: 3c010204 214 | /model.12/m.2/cv2/act/Mul_output_0: 3e254c92 215 | /model.12/Concat_output_0: 3e254c92 216 | /model.12/cv2/conv/Conv_output_0: 3df17628 217 | /model.12/cv2/act/Sigmoid_output_0: 3c0101fb 218 | /model.12/cv2/act/Mul_output_0: 3ddde732 219 | /model.13/Resize_output_0: 3ddde732 220 | /model.14/Concat_output_0: 3e202c54 221 | /model.15/cv1/conv/Conv_output_0: 3dcac45d 222 | /model.15/cv1/act/Sigmoid_output_0: 3c0101e7 223 | /model.15/cv1/act/Mul_output_0: 3dcac42e 224 | /model.15/Split_output_0: 3dcac42e 225 | /model.15/Split_output_1: 3d8ad2f9 226 | /model.15/m.0/cv1/conv/Conv_output_0: 3da4f023 227 | /model.15/m.0/cv1/act/Sigmoid_output_0: 3c010018 228 | /model.15/m.0/cv1/act/Mul_output_0: 3d9d1e96 229 | /model.15/m.0/cv2/conv/Conv_output_0: 3db4d1a4 230 | /model.15/m.0/cv2/act/Sigmoid_output_0: 3c010183 231 | /model.15/m.0/cv2/act/Mul_output_0: 3db2de16 232 | /model.15/m.1/cv1/conv/Conv_output_0: 3df27856 233 | /model.15/m.1/cv1/act/Sigmoid_output_0: 3c010201 234 | /model.15/m.1/cv1/act/Mul_output_0: 3df27850 235 | /model.15/m.1/cv2/conv/Conv_output_0: 3e00c82f 236 | /model.15/m.1/cv2/act/Sigmoid_output_0: 3c0101f5 237 | /model.15/m.1/cv2/act/Mul_output_0: 3dd55d2a 238 | /model.15/m.2/cv1/conv/Conv_output_0: 3e04b1e0 239 | /model.15/m.2/cv1/act/Sigmoid_output_0: 3c0101e3 240 | /model.15/m.2/cv1/act/Mul_output_0: 3dc89533 241 | /model.15/m.2/cv2/conv/Conv_output_0: 3e84105f 242 | /model.15/m.2/cv2/act/Sigmoid_output_0: 3c010204 243 | /model.15/m.2/cv2/act/Mul_output_0: 3e263fe5 244 | /model.15/Concat_output_0: 3e263fe5 245 | /model.15/cv2/conv/Conv_output_0: 3f26c491 246 | /model.15/cv2/act/Sigmoid_output_0: 3c010203 247 | /model.15/cv2/act/Mul_output_0: 3dfa6fbc 248 | /model.16/conv/Conv_output_0: 3e2c4214 249 | /model.16/act/Sigmoid_output_0: 3c0101ff 250 | /model.16/act/Mul_output_0: 3de65bc9 251 | /model.17/Concat_output_0: 3de65bc9 252 | /model.18/cv1/conv/Conv_output_0: 3e28ed74 253 | /model.18/cv1/act/Sigmoid_output_0: 3c010204 254 | /model.18/cv1/act/Mul_output_0: 3e28ed74 255 | /model.18/Split_output_0: 3e28ed74 256 | /model.18/Split_output_1: 3e256f1c 257 | /model.18/m.0/cv1/conv/Conv_output_0: 3e58b179 258 | /model.18/m.0/cv1/act/Sigmoid_output_0: 3c010204 259 | /model.18/m.0/cv1/act/Mul_output_0: 3e58b179 260 | /model.18/m.0/cv2/conv/Conv_output_0: 3e26f836 261 | /model.18/m.0/cv2/act/Sigmoid_output_0: 3c010204 262 | /model.18/m.0/cv2/act/Mul_output_0: 3e26f836 263 | /model.18/m.1/cv1/conv/Conv_output_0: 3e7bbf21 264 | /model.18/m.1/cv1/act/Sigmoid_output_0: 3c010204 265 | /model.18/m.1/cv1/act/Mul_output_0: 3e7bbf21 266 | /model.18/m.1/cv2/conv/Conv_output_0: 3e26e2d7 267 | /model.18/m.1/cv2/act/Sigmoid_output_0: 3c010204 268 | /model.18/m.1/cv2/act/Mul_output_0: 3e26e2d7 269 | /model.18/m.2/cv1/conv/Conv_output_0: 3e289ae1 270 | /model.18/m.2/cv1/act/Sigmoid_output_0: 3c010204 271 | /model.18/m.2/cv1/act/Mul_output_0: 3e1cf86a 272 | /model.18/m.2/cv2/conv/Conv_output_0: 3e8cb191 273 | /model.18/m.2/cv2/act/Sigmoid_output_0: 3c010204 274 | /model.18/m.2/cv2/act/Mul_output_0: 3e8ab694 275 | /model.18/Concat_output_0: 3e8ab694 276 | /model.18/cv2/conv/Conv_output_0: 3f02d55a 277 | /model.18/cv2/act/Sigmoid_output_0: 3c010204 278 | /model.18/cv2/act/Mul_output_0: 3e3cb288 279 | /model.19/conv/Conv_output_0: 3e13308d 280 | /model.19/act/Sigmoid_output_0: 3c010204 281 | /model.19/act/Mul_output_0: 3e13308d 282 | /model.20/Concat_output_0: 3e13308d 283 | /model.21/cv1/conv/Conv_output_0: 3e0cf19e 284 | /model.21/cv1/act/Sigmoid_output_0: 3c010204 285 | /model.21/cv1/act/Mul_output_0: 3e0cf19e 286 | /model.21/Split_output_0: 3de9706a 287 | /model.21/Split_output_1: 3e0cf19e 288 | /model.21/m.0/cv1/conv/Conv_output_0: 3e06ad63 289 | /model.21/m.0/cv1/act/Sigmoid_output_0: 3c010204 290 | /model.21/m.0/cv1/act/Mul_output_0: 3e06ad63 291 | /model.21/m.0/cv2/conv/Conv_output_0: 3e0f36e0 292 | /model.21/m.0/cv2/act/Sigmoid_output_0: 3c010204 293 | /model.21/m.0/cv2/act/Mul_output_0: 3e0f36e0 294 | /model.21/m.1/cv1/conv/Conv_output_0: 3df940c3 295 | /model.21/m.1/cv1/act/Sigmoid_output_0: 3c010202 296 | /model.21/m.1/cv1/act/Mul_output_0: 3df940be 297 | /model.21/m.1/cv2/conv/Conv_output_0: 3e14c1aa 298 | /model.21/m.1/cv2/act/Sigmoid_output_0: 3c010204 299 | /model.21/m.1/cv2/act/Mul_output_0: 3e14c1aa 300 | /model.21/m.2/cv1/conv/Conv_output_0: 3e1624c6 301 | /model.21/m.2/cv1/act/Sigmoid_output_0: 3c010204 302 | /model.21/m.2/cv1/act/Mul_output_0: 3e1624c6 303 | /model.21/m.2/cv2/conv/Conv_output_0: 3e28c2a5 304 | /model.21/m.2/cv2/act/Sigmoid_output_0: 3c010204 305 | /model.21/m.2/cv2/act/Mul_output_0: 3e208a4a 306 | /model.21/Concat_output_0: 3e208a4a 307 | /model.21/cv2/conv/Conv_output_0: 3e6e3efc 308 | /model.21/cv2/act/Sigmoid_output_0: 3c010204 309 | /model.21/cv2/act/Mul_output_0: 3e04408b 310 | /model.22/cv2.0/cv2.0.0/conv/Conv_output_0: 3e396877 311 | /model.22/cv2.0/cv2.0.0/act/Sigmoid_output_0: 3c010204 312 | /model.22/cv2.0/cv2.0.0/act/Mul_output_0: 3e382f27 313 | /model.22/cv2.0/cv2.0.1/conv/Conv_output_0: 3f033a5a 314 | /model.22/cv2.0/cv2.0.1/act/Sigmoid_output_0: 3c010204 315 | /model.22/cv2.0/cv2.0.1/act/Mul_output_0: 3f033a5a 316 | /model.22/cv2.0/cv2.0.2/Conv_output_0: 3e127465 317 | /model.22/cv3.0/cv3.0.0/conv/Conv_output_0: 3ec6c133 318 | /model.22/cv3.0/cv3.0.0/act/Sigmoid_output_0: 3c010204 319 | /model.22/cv3.0/cv3.0.0/act/Mul_output_0: 3e2ffa4e 320 | /model.22/cv3.0/cv3.0.1/conv/Conv_output_0: 3f25156a 321 | /model.22/cv3.0/cv3.0.1/act/Sigmoid_output_0: 3c010204 322 | /model.22/cv3.0/cv3.0.1/act/Mul_output_0: 3f25156a 323 | /model.22/cv3.0/cv3.0.2/Conv_output_0: 3eb527da 324 | /model.22/Concat_output_0: 3eb527da 325 | /model.22/cv2.1/cv2.1.0/conv/Conv_output_0: 3e4f93f0 326 | /model.22/cv2.1/cv2.1.0/act/Sigmoid_output_0: 3c010204 327 | /model.22/cv2.1/cv2.1.0/act/Mul_output_0: 3e4f93f0 328 | /model.22/cv2.1/cv2.1.1/conv/Conv_output_0: 3f0348c6 329 | /model.22/cv2.1/cv2.1.1/act/Sigmoid_output_0: 3c010204 330 | /model.22/cv2.1/cv2.1.1/act/Mul_output_0: 3f0348c6 331 | /model.22/cv2.1/cv2.1.2/Conv_output_0: 3dfa6313 332 | /model.22/cv3.1/cv3.1.0/conv/Conv_output_0: 3ea14069 333 | /model.22/cv3.1/cv3.1.0/act/Sigmoid_output_0: 3c010204 334 | /model.22/cv3.1/cv3.1.0/act/Mul_output_0: 3e33076e 335 | /model.22/cv3.1/cv3.1.1/conv/Conv_output_0: 3ea16d46 336 | /model.22/cv3.1/cv3.1.1/act/Sigmoid_output_0: 3c010204 337 | /model.22/cv3.1/cv3.1.1/act/Mul_output_0: 3ea16d46 338 | /model.22/cv3.1/cv3.1.2/Conv_output_0: 3eba61db 339 | /model.22/Concat_1_output_0: 3eba61db 340 | /model.22/cv2.2/cv2.2.0/conv/Conv_output_0: 3e102f6a 341 | /model.22/cv2.2/cv2.2.0/act/Sigmoid_output_0: 3c010204 342 | /model.22/cv2.2/cv2.2.0/act/Mul_output_0: 3e102f6a 343 | /model.22/cv2.2/cv2.2.1/conv/Conv_output_0: 3efa4727 344 | /model.22/cv2.2/cv2.2.1/act/Sigmoid_output_0: 3c010204 345 | /model.22/cv2.2/cv2.2.1/act/Mul_output_0: 3efa4727 346 | /model.22/cv2.2/cv2.2.2/Conv_output_0: 3dd17524 347 | /model.22/cv3.2/cv3.2.0/conv/Conv_output_0: 3e30a231 348 | /model.22/cv3.2/cv3.2.0/act/Sigmoid_output_0: 3c010204 349 | /model.22/cv3.2/cv3.2.0/act/Mul_output_0: 3e2d8fd2 350 | /model.22/cv3.2/cv3.2.1/conv/Conv_output_0: 3ecd5b68 351 | /model.22/cv3.2/cv3.2.1/act/Sigmoid_output_0: 3c010204 352 | /model.22/cv3.2/cv3.2.1/act/Mul_output_0: 3ecd5b68 353 | /model.22/cv3.2/cv3.2.2/Conv_output_0: 3e8756e2 354 | /model.22/Concat_2_output_0: 3e8756e2 355 | /model.22/Reshape_output_0: 3eb527da 356 | /model.22/Reshape_1_output_0: 3eba61db 357 | /model.22/Reshape_2_output_0: 3e8756e2 358 | /model.22/Concat_3_output_0: 3eba61db 359 | /model.22/Split_output_0: 3e127465 360 | /model.22/Split_output_1: 3eba61db 361 | /model.22/dfl/Reshape_output_0: 3e127465 362 | /model.22/dfl/Transpose_output_0: 3e127465 363 | (Unnamed Layer* 360) [Softmax]_output: 3bfdb6b6 364 | /model.22/dfl/conv/Conv_output_0: 3dec6e61 365 | /model.22/dfl/Reshape_1_output_0: 3dec6e61 366 | /model.22/Slice_output_0: 3deafad7 367 | /model.22/Slice_1_output_0: 3dec6e61 368 | (Unnamed Layer* 420) [Constant]_output: 3f204081 369 | /model.22/Sub_output_0: 3f202394 370 | (Unnamed Layer* 422) [Constant]_output: 3f204081 371 | /model.22/Add_1_output_0: 3f26ee92 372 | /model.22/Add_2_output_0: 3fa0ce6d 373 | (Unnamed Layer* 426) [Shuffle]_output: 3c810204 374 | /model.22/Div_1_output_0: 3f20ce6d 375 | /model.22/Sub_1_output_0: 3e6a6eb5 376 | /model.22/Concat_4_output_0: 3f20ce6d 377 | (Unnamed Layer* 430) [Constant]_output: 3e810204 378 | (Unnamed Layer* 431) [Shuffle]_output: 3e810204 379 | /model.22/Mul_2_output_0: 40a9237e 380 | /model.22/Sigmoid_output_0: 3c00d409 381 | output0: 40a9237e 382 | -------------------------------------------------------------------------------- /yolov8_accelerate/runtime.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "yolov8_utils.h" 20 | 21 | 22 | // 以下示例捕获所有警告消息,但忽略信息性消息 23 | class Logger : public nvinfer1::ILogger 24 | { 25 | void log(Severity severity, const char* msg) noexcept override 26 | { 27 | // 抑制信息级别的消息 28 | if (severity <= Severity::kWARNING) 29 | std::cout << msg << std::endl; 30 | } 31 | }; 32 | 33 | // 加载模型文件 34 | std::vector load_engine_file(const std::string &file_name) 35 | { 36 | std::vector engine_data; 37 | std::ifstream engine_file(file_name, std::ios::binary); 38 | assert(engine_file.is_open() && "Unable to load engine file."); 39 | engine_file.seekg(0, engine_file.end); 40 | int length = engine_file.tellg(); 41 | engine_data.resize(length); 42 | engine_file.seekg(0, engine_file.beg); 43 | engine_file.read(reinterpret_cast(engine_data.data()), length); 44 | return engine_data; 45 | } 46 | 47 | int main(int argc, char **argv) 48 | { 49 | // 实例化ILogger 50 | Logger logger; 51 | 52 | std::unique_ptr runtime = std::unique_ptr(nvinfer1::createInferRuntime(logger)); 53 | if (runtime == nullptr){return false;} 54 | 55 | std::string file_path = "yolov8x.engine"; 56 | auto plan = load_engine_file(file_path); 57 | 58 | auto engine = std::unique_ptr(runtime->deserializeCudaEngine(plan.data(), plan.size())); 59 | if (engine == nullptr){return false;} 60 | 61 | auto context = std::unique_ptr(engine->createExecutionContext()); 62 | if (context == nullptr){return false;} 63 | 64 | auto idims = engine->getTensorShape("images"); 65 | auto odims = engine->getTensorShape("output0"); 66 | nvinfer1::Dims4 inputDims = { idims.d[0], idims.d[1], idims.d[2], idims.d[3] }; 67 | nvinfer1::Dims3 outputDims = { idims.d[0], idims.d[1], idims.d[2] }; 68 | context->setInputShape("images", inputDims); 69 | 70 | void* buffers[2]; 71 | const int inputIndex = 0; 72 | const int outputIndex = 1; 73 | 74 | cudaMalloc(&buffers[inputIndex], idims.d[0] * idims.d[1] * idims.d[2] * idims.d[3] * sizeof(float)); 75 | cudaMalloc(&buffers[outputIndex], odims.d[0] * odims.d[1] * odims.d[2] * sizeof(float)); 76 | 77 | // 设定数据地址 78 | context->setTensorAddress("images", buffers[inputIndex]); 79 | context->setTensorAddress("output0", buffers[outputIndex]); 80 | 81 | // 创建cuda流 82 | cudaStream_t stream; 83 | cudaStreamCreate(&stream); 84 | 85 | float cal_time_pre, cal_time_inf, cal_time_pos = 0.0; 86 | 87 | // 读取文件执行推理 88 | for(int i = 1; i < 63; i += 1) 89 | { 90 | // 读取数据+预处理 91 | clock_t start,end; 92 | start = clock(); 93 | // 读取图片 94 | cv::Mat img; 95 | std::string file_name = "img/img (" + std::to_string(i) + ").jpg"; 96 | img = cv::imread(file_name); 97 | if (img.empty()) //检测image有无数据,无数据 image.empty()返回 真 98 | { 99 | std::cout << "Could not open or find the image" << std::endl; 100 | return -1; 101 | } 102 | 103 | cv::Mat LetterBoxImg; 104 | cv::Vec4d params; 105 | LetterBox(img, LetterBoxImg, params, cv::Size(640, 640)); 106 | 107 | cv::Mat blob; 108 | cv::dnn::blobFromImage(img, blob, 1 / 255.0, cv::Size(640, 640), cv::Scalar(0, 0, 0), true, false, CV_32F); 109 | 110 | // 将图像拷贝到GPU 111 | cudaMemcpyAsync(buffers[inputIndex], blob.data,3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice, stream); 112 | 113 | end = clock(); 114 | cal_time_pre += ((double)(end-start)/CLOCKS_PER_SEC) * 1000; 115 | //推理 116 | start = clock(); 117 | 118 | //执行推理 119 | if(context->enqueueV3(stream)){ 120 | // std::cout << "enqueued successfully!" << std::endl; 121 | } 122 | cudaStreamSynchronize(stream); 123 | 124 | end = clock(); 125 | cal_time_inf += ((double)(end-start)/CLOCKS_PER_SEC) * 1000; 126 | //后处理 127 | start = clock(); 128 | 129 | float rst[1][84][8400]; 130 | cudaMemcpyAsync(&rst, buffers[outputIndex], 1 * 84 * 8400 * sizeof(float), cudaMemcpyDeviceToHost, stream); 131 | 132 | postprocess(rst, img, params); 133 | 134 | end = clock(); 135 | cal_time_pos += ((double)(end-start)/CLOCKS_PER_SEC) * 1000; 136 | } 137 | 138 | std::cout << "yolov8x : preprocess:" << cal_time_pre/62 << "ms; inference: " << cal_time_inf/62 << "ms; postprocess:" << cal_time_pos/62 << "ms; fps:" << 1000/(cal_time_pre+cal_time_inf+cal_time_pos)*62 << std::endl; 139 | 140 | cudaStreamDestroy(stream); 141 | cudaFree(buffers[inputIndex]); 142 | cudaFree(buffers[outputIndex]); 143 | } -------------------------------------------------------------------------------- /yolov8_accelerate/yolov8_utils.cpp: -------------------------------------------------------------------------------- 1 | #include "yolov8_utils.h" 2 | using namespace cv; 3 | using namespace std; 4 | 5 | void LetterBox(const cv::Mat& image, cv::Mat& outImage, cv::Vec4d& params, const cv::Size& newShape, 6 | bool autoShape, bool scaleFill, bool scaleUp, int stride, const cv::Scalar& color) 7 | { 8 | if (false) { 9 | int maxLen = MAX(image.rows, image.cols); 10 | outImage = Mat::zeros(Size(maxLen, maxLen), CV_8UC3); 11 | image.copyTo(outImage(Rect(0, 0, image.cols, image.rows))); 12 | params[0] = 1; 13 | params[1] = 1; 14 | params[3] = 0; 15 | params[2] = 0; 16 | } 17 | 18 | cv::Size shape = image.size(); 19 | float r = std::min((float)newShape.height / (float)shape.height, 20 | (float)newShape.width / (float)shape.width); 21 | if (!scaleUp) 22 | r = std::min(r, 1.0f); 23 | 24 | float ratio[2]{ r, r }; 25 | int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) }; 26 | 27 | auto dw = (float)(newShape.width - new_un_pad[0]); 28 | auto dh = (float)(newShape.height - new_un_pad[1]); 29 | 30 | if (autoShape) 31 | { 32 | dw = (float)((int)dw % stride); 33 | dh = (float)((int)dh % stride); 34 | } 35 | else if (scaleFill) 36 | { 37 | dw = 0.0f; 38 | dh = 0.0f; 39 | new_un_pad[0] = newShape.width; 40 | new_un_pad[1] = newShape.height; 41 | ratio[0] = (float)newShape.width / (float)shape.width; 42 | ratio[1] = (float)newShape.height / (float)shape.height; 43 | } 44 | 45 | dw /= 2.0f; 46 | dh /= 2.0f; 47 | 48 | if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1]) 49 | { 50 | cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1])); 51 | } 52 | else { 53 | outImage = image.clone(); 54 | } 55 | 56 | int top = int(std::round(dh - 0.1f)); 57 | int bottom = int(std::round(dh + 0.1f)); 58 | int left = int(std::round(dw - 0.1f)); 59 | int right = int(std::round(dw + 0.1f)); 60 | params[0] = ratio[0]; 61 | params[1] = ratio[1]; 62 | params[2] = left; 63 | params[3] = top; 64 | cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); 65 | } 66 | 67 | void postprocess(float (&rst)[1][84][8400], cv::Mat &img, cv::Vec4d params) 68 | { 69 | std::vector boxes; 70 | std::vector scores; 71 | std::vector det_rst; 72 | static const float score_threshold = 0.8; 73 | static const float nms_threshold = 0.45; 74 | std::vector indices; 75 | 76 | clock_t start2,end2; 77 | start2 = clock(); 78 | 79 | for(int Anchors=0 ;Anchors < 8400; Anchors++) 80 | { 81 | float max_score = 0.0; 82 | int max_score_det = 99; 83 | float pdata[4]; 84 | for(int prob = 4; prob < 84; prob++) 85 | { 86 | if(rst[0][prob][Anchors] > max_score){ 87 | max_score = rst[0][prob][Anchors]; 88 | max_score_det = prob - 4; 89 | pdata[0] = rst[0][0][Anchors]; 90 | pdata[1] = rst[0][1][Anchors]; 91 | pdata[2] = rst[0][2][Anchors]; 92 | pdata[3] = rst[0][3][Anchors]; 93 | } 94 | } 95 | if(max_score >= score_threshold) 96 | { 97 | float x = (pdata[0] - params[2]) / params[0]; 98 | float y = (pdata[1] - params[3]) / params[1]; 99 | float w = pdata[2] / params[0]; 100 | float h = pdata[3] / params[1]; 101 | int left = MAX(int(x - 0.5 * w + 0.5), 0); 102 | int top = MAX(int(y - 0.5 * h + 0.5), 0); 103 | boxes.push_back(Rect(left, top, int(w + 0.5), int(h + 0.5))); 104 | scores.emplace_back(max_score); 105 | det_rst.emplace_back(max_score_det); 106 | } 107 | } 108 | 109 | end2 = clock(); 110 | // std::cout << ((double)(end2-start2)/CLOCKS_PER_SEC) * 1000 << std::endl; 111 | 112 | cv::dnn::NMSBoxes(boxes, scores, score_threshold, nms_threshold, indices); 113 | 114 | for (int i = 0; i < indices.size(); i++) { 115 | // std::cout << boxes[indices[i]] << std::endl; 116 | cv::rectangle(img, boxes[indices[i]], Scalar(255, 0, 0), 2, LINE_8,0); 117 | } 118 | 119 | // cv::imshow("rst",img); 120 | // cv::waitKey(0); 121 | } -------------------------------------------------------------------------------- /yolov8_accelerate/yolov8_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | void LetterBox(const cv::Mat& image, cv::Mat& outImage, 7 | cv::Vec4d& params, //[ratio_x,ratio_y,dw,dh] 8 | const cv::Size& newShape = cv::Size(640, 640), 9 | bool autoShape = false, 10 | bool scaleFill = false, 11 | bool scaleUp = true, 12 | int stride = 32, 13 | const cv::Scalar& color = cv::Scalar(0, 0, 0) 14 | ); 15 | 16 | void postprocess(float (&rst)[1][84][8400], cv::Mat &img, cv::Vec4d params); --------------------------------------------------------------------------------