├── CMakeLists.txt ├── README.md ├── example ├── CMakeLists.txt ├── ddrnet │ ├── CMakeLists.txt │ ├── ReadMe.md │ └── main.cpp ├── hrnet │ ├── CMakeLists.txt │ └── main.cpp └── yolov5 │ ├── CMakeLists.txt │ └── main.cpp ├── getWeights.py ├── model ├── ddrnet.json ├── ddrnet │ └── ddrnet.json ├── deeplabV3_res50.json ├── hrnet │ ├── hrnet_w18.json │ ├── hrnet_w32.json │ └── hrnet_w48.json ├── resnet50.json └── yolo │ └── yolov5s.json └── src ├── CMakeLists.txt1 ├── calibrator.cpp ├── calibrator.h ├── json-forwards.h ├── json.h ├── jsoncpp.cpp ├── plugin ├── UpsampleKernel.cu ├── UpsamplePlugin.cpp ├── UpsamplePlugin.h ├── UpsmapleKernel.h ├── hardswish.cu ├── hardswish.h ├── yololayer.cu └── yololayer.h ├── trt.cpp ├── trt.h ├── utils.cpp └── utils.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #指定CMake的最小版本要求 2 | cmake_minimum_required(VERSION 3.2) 3 | 4 | #定义工程名称 5 | project(tensorrtF) 6 | 7 | #指定C++版本11 8 | set(CMAKE_CXX_STANDARD 11) 9 | 10 | #设置g++编译选项 使用c++11 -O3优化等级 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3") 12 | 13 | #CUDA设置 14 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) 15 | 16 | #win32环境设置opencv依赖 17 | if(WIN32) 18 | add_definitions(-std=c++11) 19 | set(OpenCV_DIR "D:\\opencv\\opencv346\\build") 20 | endif(WIN32) 21 | 22 | #UNIX环境设置opencv依赖版本 23 | if(UNIX) 24 | find_package(OpenCV 3.4.3 REQUIRED)#查找opencv依赖,REQUIRED可选字段。表示一定要找到包,找不到的话就立即停掉整个cmake。而如果不指定REQUIRED则cmake会继续执行。 25 | endif(UNIX) 26 | 27 | #显式定义tensorrt目录 28 | set(TENSORRT_DIR "/home/zxl/TensorRT-7.2.3.4/targets/x86_64-linux-gnu") 29 | 30 | #寻找CUDA依赖 31 | find_package(CUDA REQUIRED) 32 | 33 | #支持CUDA语言 34 | enable_language(CUDA) 35 | #输出正常 STATUS 36 | message(STATUS " libraries: ${CUDA_LIBRARIES}") 37 | message(STATUS " include path: ${CUDA_INCLUDE_DIRS}") 38 | 39 | #向工程添加CUDA头文件搜索路径 40 | include_directories(${CUDA_INCLUDE_DIRS}) 41 | 42 | #设置nvcc编译选项 43 | set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11; -g; -G;-gencode; arch=compute_86;code=sm_86)#原来是75 44 | 45 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -D_MWAITXINTRIN_H_INCLUDED") 46 | #-Wall选项意思是编译后显示所有警告 47 | #-Ofast将不会严格遵循语言标准,除了启用所有的-O3优化选项之外,也会针对某些语言启用部分优化 48 | 49 | #向工程添加tensorrt的头文件搜索路径 50 | include_directories(${TENSORRT_DIR}/include)#原来是\\ 51 | #向工程添加tensorrt的库文件搜索路径 52 | link_directories(${TENSORRT_DIR}/lib)#原来是\\ 53 | 54 | #查找opencv依赖 55 | find_package(OpenCV QUIET #QUIET表示disable信息输出,即命令运行时不输出相关message 56 | NO_MODULE #cmake不在MODULE模式的路径下查找包,直接进入CONFIG模式 57 | NO_DEFAULT_PATH 58 | NO_CMAKE_PATH#关闭cmake特定的缓存变量这一查找顺序 59 | NO_CMAKE_ENVIRONMENT_PATH#跳过搜索cmake特定的环境变量 60 | NO_SYSTEM_ENVIRONMENT_PATH#跳过搜索标准的系统环境变量PATH 61 | NO_CMAKE_PACKAGE_REGISTRY#跳过搜索存储在cmake的"User Package Registry"(用户包注册表)中的路径 62 | NO_CMAKE_BUILDS_PATH 63 | NO_CMAKE_SYSTEM_PATH#通过设定当前系统定义的cmake变量 64 | NO_CMAKE_SYSTEM_PACKAGE_REGISTRY#跳过在cmake的"System Package Registry"(系统包注册表)中查找 65 | ) 66 | 67 | #输出正常 STATUS 68 | message(STATUS "OpenCV library status:") 69 | message(STATUS " version: ${OpenCV_VERSION}") 70 | message(STATUS " libraries: ${OpenCV_LIBS}") 71 | message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") 72 | 73 | #向工程添加opencv的头文件搜索路径 74 | include_directories(${OpenCV_INCLUDE_DIRS}) 75 | 76 | #对子文件夹项目进行cmake编译 77 | add_subdirectory(example) 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 项目介绍 2 | 3 | * 零代码:只需配置文件(json文件)和权重文件即可生成engin文件,实现trt部署。 4 | * 自动化生成配置文件:由pt模型文件可以自动化生成json文件。 5 | * 可视化网络:便于查验和原始网络(比如pytorch)的区别。 6 | * debug教程:方便对比trt输出和pytorch模型输出的区别,从而方便定位部署上的问题。 7 | 8 | # 项目结构 9 | 10 | ``` 11 | tensorrtCV 12 | | 13 | |- src 14 | | | 15 | | |- plugin 16 | | | |-xxx.h xxx.cu 17 | | |-xxx.cpp 18 | | |-xxx.h 19 | |- example 20 | | | 21 | | |-ddrnet 22 | | | |-main.cpp 23 | | 24 | | |-yolov5 25 | | | |-main.cpp 26 | | 27 | |- model 28 | | |-xxx.json 29 | ``` 30 | 31 | * `src` 为tensort项目的主代码目录。其中包含`plugin`目录存放啊各种plugin 32 | * `example`目录中为不同网络的demo代码 33 | * `model`目录中为各种网络的json文件。 34 | 35 | ## 编译运行 36 | 37 | ### win/linux 38 | 39 | 统一使用cmake管理,**需要在CMakeLists手动修改 OpenCV_DIR TENSORRT_DIR ** 40 | 41 | 编译命令: 42 | 43 | ```makefile 44 | mkdir build 45 | cd build 46 | cmake .. 47 | // win下指定vs编译 48 | // cmake .. -G "Visual Studio 15 2017 Win64" 49 | make 50 | // win下vs打开tensorrtF.sln编译运行 51 | ``` 52 | 53 | ### 自动化生成json文件 54 | 55 | * [pytorch-classification](https://github.com/AlfengYuan/pytorch-classification) 56 | 57 | ## Comming Soon 58 | 59 | - [ ] 小白系列教程 60 | - [ ] json网络可视化 61 | 62 | **欢迎各位同学PR模型配置(json文件)和新功能。** 63 | **另外,请关注我的微信公众号(CV加速器),定期有直播讲解整个工程和集中回答问题。** 64 | 65 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #指定CMake的最小版本要求 2 | cmake_minimum_required(VERSION 3.2) 3 | add_subdirectory(ddrnet) 4 | add_subdirectory(hrnet) 5 | add_subdirectory(yolov5) 6 | 7 | -------------------------------------------------------------------------------- /example/ddrnet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #指定CMake的最小版本要求 2 | cmake_minimum_required(VERSION 3.2) 3 | #定义工程名称 4 | project(ddrnet) 5 | 6 | #显示消息 7 | message("=========", ${CMAKE_SOURCE_DIR})#CMAKE_SOURCE_DIR 工程顶层目录,绝对路径 "/home/zxl/mywork/tensorrtCV" 8 | message("=========", ${PROJECT_SOURCE_DIR}) 9 | 10 | #将文件组成的列表,保存到变量TRT_FILES中 11 | file(GLOB TRT_FILES 12 | "${CMAKE_SOURCE_DIR}/src/*.h" 13 | "${CMAKE_SOURCE_DIR}/src/*.cpp" 14 | "${CMAKE_SOURCE_DIR}/src/plugin/*.cu" 15 | "${CMAKE_SOURCE_DIR}/src/plugin/*.h" 16 | "${CMAKE_SOURCE_DIR}/src/plugin/*.cpp") 17 | 18 | #向工程添加头文件搜索路径 19 | include_directories(${CMAKE_SOURCE_DIR}/src)#"/home/zxl/mywork/tensorrtCV/src" 20 | 21 | #显式定义SOURCE_FILES ,其值为main.cpp 22 | set(SOURCE_FILES main.cpp) 23 | 24 | #编译生成可执行文件ddrnet 25 | add_executable(ddrnet ${SOURCE_FILES} ${TRT_FILES}) 26 | 27 | #将目标文件与库文件进行链接 28 | target_link_libraries(ddrnet nvonnxparser) 29 | target_link_libraries(ddrnet nvinfer) 30 | target_link_libraries(ddrnet cudart) 31 | target_link_libraries(ddrnet ${OpenCV_LIBS}) 32 | -------------------------------------------------------------------------------- /example/ddrnet/ReadMe.md: -------------------------------------------------------------------------------- 1 | * 使用的模型为 ddrnet23-slim 2 | * ddrnet仓库地址 https://github.com/chenjun2hao/DDRNet.pytorch 3 | 4 | -------------------------------------------------------------------------------- /example/ddrnet/main.cpp: -------------------------------------------------------------------------------- 1 | #include "trt.h" 2 | #include "utils.h" 3 | #include "opencv2/opencv.hpp" 4 | 5 | cv::Mat createLTU(int len) { 6 | cv::Mat lookUpTable(1, 256, CV_8U); 7 | uchar* p = lookUpTable.data; 8 | for (int j = 0; j < 256; ++j) { 9 | p[j] = (j * (256 / len) > 255) ? uchar(255) : (uchar)(j * (256 / len)); 10 | } 11 | return lookUpTable; 12 | } 13 | 14 | void test() 15 | { 16 | string jsonPath = "D:/hrnet_ocr/test.json"; 17 | trt *m_trt = new trt(jsonPath); 18 | m_trt->createENG(); 19 | int batchsize = 1; 20 | m_trt->inference_init(batchsize, m_trt->getOutDim()); 21 | float *a = new float[batchsize*100]; 22 | for(int i = 0; i < batchsize*100; i++) 23 | a[i] = i*1.0 +1; 24 | float *out = new float[batchsize*56]; 25 | m_trt->doInference(a,batchsize,out); 26 | for(int i = 0;i<56;i++) 27 | { 28 | cout<createENG(); 37 | } 38 | 39 | void HRnet(std::string jsonPath) 40 | { 41 | //string jsonPath = "/mnt/f/LearningCode/ddrnet/ddrnet-slim2-wsl_infer.json"; 42 | trt *m_trt = new trt(jsonPath); 43 | if(m_trt->param.createENG) 44 | { 45 | m_trt->createENG(); 46 | m_trt->param.outputSize = m_trt->getOutDim(); 47 | std::cout <<"Create engine..."<< std::endl; 48 | std::cout <<"outputSize=" << m_trt->getOutDim() << std::endl; 49 | // write json 50 | Json::Reader m_Reader; 51 | Json::Value root; 52 | ifstream fp; 53 | fp.open(jsonPath,ios::binary); 54 | m_Reader.parse(fp,root); 55 | 56 | Json::StyledWriter writer; 57 | root["outputSize"] = m_trt->getOutDim(); 58 | root["createENG"] = false; 59 | ofstream os; 60 | auto replacepos = jsonPath.find(".json"); 61 | string inferjsonPath = jsonPath.replace(replacepos, 5, "_infer.json"); 62 | os.open(inferjsonPath, ios::binary); 63 | os << writer.write(root); 64 | os.close(); 65 | fp.close(); 66 | 67 | return; 68 | } 69 | //m_trt->createENG(); 70 | int batchsize = m_trt->param.BatchSize; 71 | m_trt->inference_init(batchsize, m_trt->param.outputSize); 72 | 73 | vector testVal; 74 | map dataProb; 75 | vector imgs; 76 | cv::Mat img; 77 | string pattern = m_trt->param.imgDir+ "*."+m_trt->param.imgType; 78 | vector images_names; 79 | cv::glob(pattern, images_names, false); 80 | if(images_names.empty()) 81 | { 82 | std::cout << "No img files " << std::endl; 83 | return; 84 | } 85 | int i = 0; 86 | cv::Scalar Mean = cv::Scalar(m_trt->param.mean[0], m_trt->param.mean[1], m_trt->param.mean[2]); 87 | cv::Scalar Std = cv::Scalar(m_trt->param.std[0], m_trt->param.std[1], m_trt->param.std[2]); 88 | cv::Size size = { m_trt->param.input_h,m_trt->param.input_w }; 89 | cout << size << "batch size= " << batchsize << endl; 90 | int flag = 0; 91 | if (m_trt->param.input_c == 3) 92 | { 93 | flag = 1; 94 | } 95 | for (auto image_name : images_names) 96 | { 97 | if (i < batchsize) 98 | { 99 | i++; 100 | cv::Mat Img = cv::imread(image_name, flag); 101 | //resize(Img, Img, size, 0, 0, cv::INTER_LINEAR); 102 | if (flag == 1) 103 | { 104 | cv::Mat img; 105 | Img.convertTo(img, CV_32FC3, 1 / 255.0); 106 | testVal.push_back(img); 107 | } 108 | else { 109 | testVal.push_back(Img); 110 | } 111 | cout << image_name << endl; 112 | imgs.push_back(image_name); 113 | } 114 | } 115 | 116 | std::cout << "out dim : " << m_trt->param.outputSize << std::endl; 117 | float *data = new float[batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w]; 118 | int *output = new int[batchsize*m_trt->param.outputSize]; 119 | 120 | cv::Mat Transed_t = BlobFromImages(testVal, cv::Size{ m_trt->param.input_w,m_trt->param.input_h }, Mean, Std, true, false); 121 | //cout<param.input_h,m_trt->param.input_w},cv::Scalar{0}); 123 | memcpy(data, Transed_t.data, batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w * sizeof(float)); 124 | 125 | 126 | //m_trt->doInference(data, batchsize, output); // float 127 | std::cout<<"param.outputSize = " << m_trt->param.outputSize << std::endl; 128 | m_trt->doInference_int(data, batchsize, output); // float 129 | 130 | ////post 131 | cv::Mat outimg(m_trt->param.input_h, m_trt->param.input_w, CV_8UC1); 132 | for (int row = 0; row < m_trt->param.input_h; ++row) { 133 | uchar* uc_pixel = outimg.data + row * outimg.step; 134 | for (int col = 0; col < m_trt->param.input_w; ++col) { 135 | uc_pixel[col] = (uchar)output[row*m_trt->param.input_w + col]; 136 | } 137 | } 138 | cv::Mat im_color; 139 | cv::cvtColor(outimg, im_color, cv::COLOR_GRAY2RGB); 140 | cv::Mat lut = createLTU(19); // numclass 141 | cv::LUT(im_color, lut, im_color); 142 | // false color 143 | cv::cvtColor(im_color, im_color, cv::COLOR_RGB2GRAY); 144 | cv::applyColorMap(im_color, im_color, cv::COLORMAP_HOT); 145 | cv::imshow("False Color Map", im_color); 146 | //fusion 147 | //cv::Mat fusionImg; 148 | //cv::addWeighted(img, 1, im_color, 0.5, 1, fusionImg); 149 | //cv::imshow("Fusion Img", fusionImg); 150 | cv::waitKey(0); 151 | 152 | 153 | } 154 | 155 | int main(int argc ,char** argv) 156 | { 157 | //string JsonPath = argv[1]; 158 | //trt *m_trt = new trt(JsonPath); 159 | //m_trt->createENG(); 160 | //int batchsize = 1; 161 | //m_trt->inference_init(batchsize); 162 | //float *input = new float[batchsize * m_trt->param.input_c * m_trt->param.input_h * m_trt->param.input_w]; 163 | //float *output = new float[batchsize * m_trt->param.outputSize]; 164 | //m_trt->doInference(input,1,output); 165 | if(argc < 2) 166 | { 167 | std::cout << "Parameter error !" << std::endl; 168 | return -1; 169 | } 170 | HRnet(argv[1]); 171 | 172 | return 0; 173 | } 174 | -------------------------------------------------------------------------------- /example/hrnet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(hrnet) 3 | 4 | message("=========", ${CMAKE_SOURCE_DIR}) 5 | message("=========", ${PROJECT_SOURCE_DIR}) 6 | 7 | 8 | file(GLOB TRT_FILES 9 | "${CMAKE_SOURCE_DIR}/src/*.h" 10 | "${CMAKE_SOURCE_DIR}/src/*.cpp" 11 | "${CMAKE_SOURCE_DIR}/src/plugin/*.cu" 12 | "${CMAKE_SOURCE_DIR}/src/plugin/*.h" 13 | "${CMAKE_SOURCE_DIR}/src/plugin/*.cpp") 14 | 15 | include_directories(${CMAKE_SOURCE_DIR}/src) 16 | 17 | set(SOURCE_FILES main.cpp) 18 | add_executable(hrnet ${SOURCE_FILES} ${TRT_FILES}) 19 | 20 | 21 | target_link_libraries(hrnet nvonnxparser) 22 | target_link_libraries(hrnet nvinfer) 23 | target_link_libraries(hrnet cudart) 24 | target_link_libraries(hrnet ${OpenCV_LIBS}) 25 | -------------------------------------------------------------------------------- /example/hrnet/main.cpp: -------------------------------------------------------------------------------- 1 | #include "trt.h" 2 | #include "utils.h" 3 | #include "opencv2/opencv.hpp" 4 | 5 | cv::Mat createLTU(int len) { 6 | cv::Mat lookUpTable(1, 256, CV_8U); 7 | uchar* p = lookUpTable.data; 8 | for (int j = 0; j < 256; ++j) { 9 | p[j] = (j * (256 / len) > 255) ? uchar(255) : (uchar)(j * (256 / len)); 10 | } 11 | return lookUpTable; 12 | } 13 | 14 | void HRnet(std::string jsonPath) 15 | { 16 | //string jsonPath = "/mnt/f/LearningCode/hrnet/hrnet_w48.json"; 17 | trt *m_trt = new trt(jsonPath); 18 | if(m_trt->param.createENG) 19 | { 20 | m_trt->createENG(); 21 | } 22 | 23 | int batchsize = m_trt->param.BatchSize; 24 | m_trt->inference_init(batchsize); 25 | std::cout << "out dim : " << m_trt->param.outputSize << std::endl; 26 | vector testVal; 27 | map dataProb; 28 | vector imgs; 29 | cv::Mat img; 30 | string pattern = m_trt->param.imgDir+ "*."+m_trt->param.imgType; 31 | vector images_names; 32 | cv::glob(pattern, images_names, false); 33 | if(images_names.empty()) 34 | { 35 | std::cout << "No img files " << std::endl; 36 | return; 37 | } 38 | int i = 0; 39 | cout << size << "batch size= " << batchsize << endl; 40 | int flag = 0; 41 | if (m_trt->param.input_c == 3) 42 | { 43 | flag = 1; 44 | } 45 | float *data = new float[batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w]; 46 | int *output = new int[batchsize*m_trt->param.outputSize]; 47 | for (auto image_name : images_names) 48 | { 49 | if (i < batchsize) 50 | { 51 | i++; 52 | cv::Mat Img = cv::imread(image_name, flag); 53 | //resize(Img, Img, size, 0, 0, cv::INTER_LINEAR); 54 | if (flag == 1) 55 | { 56 | cv::Mat img; 57 | Img.convertTo(img, CV_32FC3); 58 | testVal.push_back(img); 59 | } 60 | else { 61 | testVal.push_back(Img); 62 | } 63 | cout << image_name << endl; 64 | imgs.push_back(image_name); 65 | } 66 | } 67 | 68 | cv::Mat Transed_t = cv::dnn::blobFromImages(testVal,1.0,cv::Size{m_trt->param.input_h,m_trt->param.input_w},cv::Scalar{0},true);//BGR -> RGB 69 | memcpy(data, Transed_t.data, batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w * sizeof(float)); 70 | 71 | //m_trt->doInference(data, batchsize, output); // float 72 | m_trt->doInference_int(data, batchsize, output); 73 | 74 | ////post 75 | cv::Mat outimg(m_trt->param.input_h, m_trt->param.input_w, CV_8UC1); 76 | for (int row = 0; row < m_trt->param.input_h; ++row) { 77 | uchar* uc_pixel = outimg.data + row * outimg.step; 78 | for (int col = 0; col < m_trt->param.input_w; ++col) { 79 | uc_pixel[col] = (uchar)output[row*m_trt->param.input_w + col]; 80 | } 81 | } 82 | cv::Mat im_color; 83 | cv::cvtColor(outimg, im_color, cv::COLOR_GRAY2RGB); 84 | cv::Mat lut = createLTU(19); // numclass 85 | cv::LUT(im_color, lut, im_color); 86 | // false color 87 | cv::cvtColor(im_color, im_color, cv::COLOR_RGB2GRAY); 88 | cv::applyColorMap(im_color, im_color, cv::COLORMAP_HOT); 89 | cv::imshow("False Color Map", im_color); 90 | //fusion 91 | //cv::Mat fusionImg; 92 | //cv::addWeighted(img, 1, im_color, 0.5, 1, fusionImg); 93 | //cv::imshow("Fusion Img", fusionImg); 94 | cv::waitKey(0); 95 | 96 | 97 | } 98 | 99 | int main(int argc ,char** argv) 100 | { 101 | if(argc < 2) 102 | { 103 | std::cout << "Parameter error !" << std::endl; 104 | return -1; 105 | } 106 | HRnet(argv[1]); 107 | 108 | return 0; 109 | } 110 | -------------------------------------------------------------------------------- /example/yolov5/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #指定CMake的最小版本要求 2 | cmake_minimum_required(VERSION 3.2) 3 | #定义工程名称 4 | project(yolov5) 5 | 6 | #显示消息 7 | message("=========", ${CMAKE_SOURCE_DIR})#CMAKE_SOURCE_DIR 工程顶层目录,绝对路径 "/home/zxl/mywork/tensorrtCV" 8 | message("=========", ${PROJECT_SOURCE_DIR}) 9 | 10 | #将文件组成的列表,保存到变量TRT_FILES中 11 | file(GLOB TRT_FILES 12 | "${CMAKE_SOURCE_DIR}/src/*.h" 13 | "${CMAKE_SOURCE_DIR}/src/*.cpp" 14 | "${CMAKE_SOURCE_DIR}/src/plugin/*.cu" 15 | "${CMAKE_SOURCE_DIR}/src/plugin/*.h" 16 | "${CMAKE_SOURCE_DIR}/src/plugin/*.cpp") 17 | 18 | #向工程添加头文件搜索路径 19 | include_directories(${CMAKE_SOURCE_DIR}/src) 20 | include_directories(${CMAKE_SOURCE_DIR}/src/plugin) 21 | 22 | #显式定义SOURCE_FILES ,其值为main.cpp 23 | set(SOURCE_FILES main.cpp) 24 | 25 | #编译生成可执行文件yolov5 26 | add_executable(yolov5 ${SOURCE_FILES} ${TRT_FILES}) 27 | 28 | #将目标文件与库文件进行链接 29 | target_link_libraries(yolov5 nvonnxparser) 30 | target_link_libraries(yolov5 nvinfer) 31 | target_link_libraries(yolov5 cudart) 32 | target_link_libraries(yolov5 ${OpenCV_LIBS}) 33 | -------------------------------------------------------------------------------- /example/yolov5/main.cpp: -------------------------------------------------------------------------------- 1 | #include "trt.h" 2 | #include "utils.h" 3 | #include "yololayer.h" 4 | #include 5 | #include 6 | //windows下 7 | #ifdef WIN32 8 | #include 9 | #include 10 | #endif 11 | //linux下 12 | #ifdef linux 13 | #include 14 | #include 15 | #include 16 | #include 17 | #endif 18 | 19 | cv::Rect get_rect(cv::Mat& img, float bbox[4], int& INPUT_W, int& INPUT_H) { 20 | int l, r, t, b; 21 | float r_w = INPUT_W / (img.cols * 1.0); 22 | float r_h = INPUT_H / (img.rows * 1.0); 23 | if (r_h > r_w) { 24 | l = bbox[0] - bbox[2] / 2.f; 25 | r = bbox[0] + bbox[2] / 2.f; 26 | t = bbox[1] - bbox[3] / 2.f - (INPUT_H - r_w * img.rows) / 2; 27 | b = bbox[1] + bbox[3] / 2.f - (INPUT_H - r_w * img.rows) / 2; 28 | l = l / r_w; 29 | r = r / r_w; 30 | t = t / r_w; 31 | b = b / r_w; 32 | } 33 | else { 34 | l = bbox[0] - bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2; 35 | r = bbox[0] + bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2; 36 | t = bbox[1] - bbox[3] / 2.f; 37 | b = bbox[1] + bbox[3] / 2.f; 38 | l = l / r_h; 39 | r = r / r_h; 40 | t = t / r_h; 41 | b = b / r_h; 42 | } 43 | return cv::Rect(l, t, r - l, b - t); 44 | } 45 | 46 | float iou(float lbox[4], float rbox[4]) { 47 | float interBox[] = { 48 | (std::max)(lbox[0] - lbox[2] / 2.f , rbox[0] - rbox[2] / 2.f), //left 49 | (std::min)(lbox[0] + lbox[2] / 2.f , rbox[0] + rbox[2] / 2.f), //right 50 | (std::max)(lbox[1] - lbox[3] / 2.f , rbox[1] - rbox[3] / 2.f), //top 51 | (std::min)(lbox[1] + lbox[3] / 2.f , rbox[1] + rbox[3] / 2.f), //bottom 52 | }; 53 | 54 | if (interBox[2] > interBox[3] || interBox[0] > interBox[1]) 55 | return 0.0f; 56 | 57 | float interBoxS = (interBox[1] - interBox[0]) * (interBox[3] - interBox[2]); 58 | return interBoxS / (lbox[2] * lbox[3] + rbox[2] * rbox[3] - interBoxS); 59 | } 60 | 61 | bool cmp(const Yolo::Detection& a, const Yolo::Detection& b) { 62 | return a.conf > b.conf; 63 | } 64 | 65 | void nms(std::vector& res, float* output, int& MAX_OUTPUT_BBOX_COUNT, float conf_thresh, float nms_thresh = 0.5) { 66 | int det_size = sizeof(Yolo::Detection) / sizeof(float); 67 | std::map> m; 68 | for (int i = 0; i < output[0] && i < MAX_OUTPUT_BBOX_COUNT; i++) { 69 | if (output[1 + det_size * i + 4] <= conf_thresh) continue; 70 | Yolo::Detection det; 71 | memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float)); 72 | if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector()); 73 | m[det.class_id].push_back(det); 74 | //cout<second[0].class_id << " --- " << std::endl; 78 | auto& dets = it->second; 79 | std::sort(dets.begin(), dets.end(), cmp); 80 | for (size_t m = 0; m < dets.size(); ++m) { 81 | auto& item = dets[m]; 82 | res.push_back(item); 83 | for (size_t n = m + 1; n < dets.size(); ++n) { 84 | if (iou(item.bbox, dets[n].bbox) > nms_thresh) { 85 | dets.erase(dets.begin() + n); 86 | --n; 87 | } 88 | } 89 | } 90 | } 91 | } 92 | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) { 93 | int w, h, x, y; 94 | float r_w = input_w / (img.cols * 1.0); 95 | float r_h = input_h / (img.rows * 1.0); 96 | if (r_h > r_w) { 97 | w = input_w; 98 | h = r_w * img.rows; 99 | x = 0; 100 | y = (input_h - h) / 2; 101 | } 102 | else { 103 | w = r_h * img.cols; 104 | h = input_h; 105 | x = (input_w - w) / 2; 106 | y = 0; 107 | } 108 | cv::Mat re(h, w, CV_8UC3); 109 | cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR); 110 | cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128)); 111 | re.copyTo(out(cv::Rect(x, y, re.cols, re.rows))); 112 | return out; 113 | } 114 | void yolo_img(trt* m_trt, int& batchsize) 115 | { 116 | vector testVal; 117 | vector imgs; 118 | string pattern = m_trt->param.imgDir + "*." + m_trt->param.imgType;//"D:/qt_project/tensorrtCV/model/yolo/*.jpg"; 119 | vector images_names; 120 | cv::glob(pattern, images_names, false); 121 | 122 | string outputPath = m_trt->param.imgDir + "output/"; 123 | #ifdef WIN32 124 | if (_access(outputPath.c_str(), 0) == -1) 125 | { 126 | _mkdir(outputPath.c_str()); 127 | } 128 | #endif 129 | #ifdef linux 130 | if (access(outputPath.c_str(), 0) == -1) 131 | { 132 | mkdir(outputPath.c_str(), 0777); 133 | } 134 | #endif 135 | int inputH = m_trt->param.input_h; 136 | int inputW = m_trt->param.input_w; 137 | int flag = 1; 138 | if (m_trt->param.input_c == 1) 139 | flag = 0; 140 | float* input = new float[batchsize * m_trt->param.input_c * inputH * inputW]; 141 | float* output = new float[batchsize * m_trt->param.outputSize]; 142 | int inferBatch = 0; 143 | if (images_names.size() == 0) 144 | { 145 | cout << "no " << m_trt->param.imgType << " in " << m_trt->param.imgDir << endl; 146 | return; 147 | } 148 | for (auto image_name : images_names) 149 | { 150 | cout << image_name << endl; 151 | cv::Mat Img = cv::imread(image_name, flag); 152 | cv::Mat img = preprocess_img(Img, inputW, inputH); 153 | testVal.push_back(img); 154 | imgs.push_back(image_name); 155 | inferBatch++; 156 | if (testVal.size() != batchsize && image_name != *(images_names.end() - 1)) 157 | { 158 | continue; 159 | } 160 | cout << testVal.size() << " " << inferBatch << endl; 161 | cv::Mat Data = cv::dnn::blobFromImages(testVal, 1.0, cv::Size{ inputH,inputW }, cv::Scalar{ 0 }, true);//BGR -> RGB 162 | 163 | memcpy(input, Data.data, inferBatch * m_trt->param.input_c * inputH * inputW * sizeof(float)); 164 | 165 | m_trt->doInference(input, inferBatch, output); 166 | 167 | std::vector> batch_res(inferBatch); 168 | int MAX_OUTPUT_BBOX_COUNT = (m_trt->param.outputSize - 1) / 6; 169 | 170 | float CONF_THRESH = 0.5; 171 | float NMS_THRESH = 0.5; 172 | 173 | for (int j = 0; j < inferBatch; j++) 174 | { 175 | auto& res = batch_res[j]; 176 | nms(res, &output[j * m_trt->param.outputSize], MAX_OUTPUT_BBOX_COUNT, CONF_THRESH, NMS_THRESH); 177 | } 178 | 179 | for (int b = 0; b < inferBatch; b++) { 180 | auto& res = batch_res[b]; 181 | // ofstream openfile("H:/myGitHub/tensorrtF/model/yolov5/test/trt_results.txt"); 182 | // for(int n=0;nparam.imgDir.size(), outputPath);//"D:/qt_project/tensorrtCV/model/test/1.jpg"; 199 | cv::imwrite(outPath, img); 200 | } 201 | imgs.clear(); 202 | testVal.clear(); 203 | batch_res.clear(); 204 | inferBatch = 0; 205 | } 206 | } 207 | 208 | void yolo_vedio(trt* m_trt, int& batchsize) 209 | { 210 | string vedios = m_trt->param.imgDir + "*" + m_trt->param.imgType;//"D:/qt_project/tensorrtCV/model/yolo/test.mp4"; 211 | vector vedios_names; 212 | cv::glob(vedios, vedios_names, false); 213 | string outputPath = m_trt->param.imgDir + "output/"; 214 | #ifdef WIN32 215 | if (_access(outputPath.c_str(), 0) == -1) 216 | { 217 | _mkdir(outputPath.c_str()); 218 | } 219 | #endif 220 | #ifdef linux 221 | if (access(outputPath.c_str(), 0) == -1) 222 | { 223 | mkdir(outputPath.c_str(), 0777); 224 | } 225 | #endif 226 | int inputH = m_trt->param.input_h; 227 | int inputW = m_trt->param.input_w; 228 | float* input = new float[batchsize * m_trt->param.input_c * inputH * inputW]; 229 | float* output = new float[batchsize * m_trt->param.outputSize]; 230 | for (auto vedio_name : vedios_names) 231 | { 232 | cv::VideoCapture capture(vedio_name); 233 | if (!capture.isOpened()) 234 | { 235 | cout << "can not open vedio file, please check it : " << vedio_name << endl; 236 | return; 237 | } 238 | cv::Size size = cv::Size((int)capture.get(cv::CAP_PROP_FRAME_WIDTH), (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT)); 239 | double fps = capture.get(cv::CAP_PROP_FPS); 240 | double allFps = capture.get(cv::CAP_PROP_FRAME_COUNT); 241 | cout << allFps << endl; 242 | cv::VideoWriter writer; 243 | string vedioOputPath = ((string)vedio_name).replace(0, m_trt->param.imgDir.size(), outputPath); 244 | vedioOputPath = vedioOputPath.substr(0, vedioOputPath.size() - 3); 245 | vedioOputPath = vedioOputPath + "avi"; 246 | cout << "vedio output Path :" << vedioOputPath << endl; 247 | writer.open(vedioOputPath, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), fps, size, true); 248 | if (!writer.isOpened()) 249 | { 250 | cout << "write vedio error! please check it : " << vedioOputPath << endl; 251 | return; 252 | } 253 | cv::Mat img; 254 | vector testVal; 255 | vector imgs; 256 | auto start_time = std::chrono::system_clock::now(); 257 | int numFps = 0; 258 | int inferBatch = 0; 259 | while (capture.read(img)) { 260 | imgs.push_back(img); 261 | cv::Mat Img = preprocess_img(img, inputW, inputH); 262 | testVal.push_back(Img); 263 | inferBatch++; 264 | numFps++; 265 | if (inferBatch != batchsize && numFps < allFps) 266 | { 267 | continue; 268 | } 269 | cv::Mat Data = cv::dnn::blobFromImages(testVal, 1.0, cv::Size{ inputH,inputW }, cv::Scalar{ 0 }, true); 270 | memcpy(input, Data.data, inferBatch * m_trt->param.input_c * inputH * inputW * sizeof(float)); 271 | 272 | m_trt->doInference(input, inferBatch, output); 273 | 274 | std::vector> batch_res(inferBatch); 275 | int MAX_OUTPUT_BBOX_COUNT = (m_trt->param.outputSize - 1) / 6; 276 | 277 | float CONF_THRESH = 0.5; 278 | float NMS_THRESH = 0.5; 279 | 280 | for (int j = 0; j < inferBatch; j++) 281 | { 282 | auto& res = batch_res[j]; 283 | nms(res, &output[j * m_trt->param.outputSize], MAX_OUTPUT_BBOX_COUNT, CONF_THRESH, NMS_THRESH); 284 | } 285 | 286 | for (int b = 0; b < inferBatch; b++) { 287 | auto& res = batch_res[b]; 288 | cv::Mat writeImg = imgs[b]; 289 | for (size_t j = 0; j < res.size(); j++) { 290 | cv::Rect r = get_rect(writeImg, res[j].bbox, inputW, inputH); 291 | cv::rectangle(writeImg, r, cv::Scalar(0x27, 0xC1, 0x36), 2); 292 | cv::putText(writeImg, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0x00, 0x00), 2); 293 | } 294 | writer.write(writeImg); 295 | } 296 | 297 | imgs.clear(); 298 | testVal.clear(); 299 | batch_res.clear(); 300 | inferBatch = 0; 301 | 302 | } 303 | capture.release(); 304 | writer.release(); 305 | auto end_time = std::chrono::system_clock::now(); 306 | float duration; 307 | duration = std::chrono::duration_cast(end_time - start_time).count(); 308 | cout << "all time : " << duration << endl; 309 | } 310 | } 311 | int main() 312 | { 313 | string JsonPath = "D:/qt_project/tensorrtF/model/yolo/yolov5s.json"; 314 | trt* m_trt = new trt(JsonPath); 315 | if (m_trt->param.createENG) 316 | m_trt->createENG(); 317 | if (!m_trt->param.doInfer) 318 | return 0; 319 | int batchsize = m_trt->param.BatchSize; 320 | int outputSize = m_trt->param.outputSize; 321 | m_trt->inference_init(batchsize, outputSize); 322 | string imgType = "jpg png bmp jpeg"; 323 | string::size_type imgIdx = imgType.find(m_trt->param.imgType); 324 | if (imgIdx != string::npos) 325 | yolo_img(m_trt, batchsize); 326 | string vedioType = "mp4 avi"; 327 | string::size_type vedioIdx = vedioType.find(m_trt->param.imgType); 328 | if (vedioIdx != string::npos) 329 | yolo_vedio(m_trt, batchsize); 330 | return 0; 331 | } 332 | -------------------------------------------------------------------------------- /getWeights.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat May 29 19:12:58 2021 4 | 5 | @author: Administrator 6 | """ 7 | import xml.etree.ElementTree as ET 8 | import os 9 | from PIL import Image 10 | import numpy as np 11 | import torch 12 | 13 | from PIL import Image 14 | import torch 15 | import torchvision 16 | import struct 17 | 18 | 19 | 20 | 21 | path = "H:/myGitHub/tensorrtF/model/yolov5/" 22 | Path = os.path.join(path, "wts") 23 | if not os.path.isdir(Path): 24 | os.makedirs(Path) 25 | def getweights(model_path): 26 | state_dict = torch.load(model_path,map_location= lambda storage,loc :storage) 27 | print(state_dict ) 28 | keys = [v for key,v in enumerate(state_dict)] 29 | print(keys) 30 | with open(os.path.join(Path,"network.txt"),'w') as fw: 31 | for key in keys: 32 | print("~~~~~~~~~~~ ",key) 33 | ts = state_dict[key] 34 | shape = ts.shape 35 | size = shape 36 | allsize = 1 37 | fw.write(key + " ") 38 | for idx in range(len(size)): 39 | allsize *= size[idx] 40 | fw.write(str(size[idx])+ " ") 41 | fw.write('\n') 42 | ts = ts.reshape(allsize) 43 | with open(Path + '/'+ key + '.wgt','wb') as f: 44 | a = struct.pack('i',allsize) 45 | f.write(a) 46 | for i in range(allsize): 47 | a = struct.pack('f',ts[i])#.hex() 48 | f.write(a) 49 | 50 | 51 | 52 | if __name__ == '__main__': 53 | 54 | model = torch.load(path+'yolov5s.pt')['model'].float() 55 | torch.save(model.state_dict(),path+'yolov5s.pth') 56 | getweights(path + "yolov5s.pth") 57 | #model = torchvision.models.resnet50() 58 | #model.eval() 59 | #torch.save(model.state_dict(),r"H:\myGitHub\tensorrtF\model\resnet50\res50.pth") 60 | #a = torch.randn(1,3,256,256).type(torch.float32) 61 | #torch.onnx.export(model, a,r"H:\myGitHub\tensorrtF\model\resnet50\res50.onnx",training=2 ) -------------------------------------------------------------------------------- /model/ddrnet.json: -------------------------------------------------------------------------------- 1 | { 2 | "BatchSize": 1, 3 | "maxBatchsize":1, 4 | "indim":3, 5 | "input_c": 3, 6 | "input_h": 512, 7 | "input_w": 1024, 8 | "ENGPath": "/mnt/f/LearningCode/ddrnet/ddrnetwsl.eng", 9 | "weightsDir": "/mnt/f/LearningCode/ddrnet/wts/", 10 | "int8": false, 11 | "fp16": true, 12 | "div_255": true, 13 | "Mean": [ 0.485, 0.456, 0.406 ], 14 | "Std": [ 0.229, 0.224, 0.225 ], 15 | "inputBlobName": "data", 16 | "outputBlobName": "output", 17 | "outputSize": 4194304, 18 | "createENG": true, 19 | "doInfer": false, 20 | "imgDir": "/mnt/f/dataset/oneimg/", 21 | "imgType": "png", 22 | "network": [ 23 | { 24 | "layerStyle": "cba", 25 | "layerName": "relu_16", 26 | "inputName": "data", 27 | "convFile": "conv1.0", 28 | "biasFile":"conv1.0", 29 | "parameter": { 30 | "input_c": 3, 31 | "output_c": 32, 32 | "kernel": [ 3, 3 ], 33 | "padding": [ 1, 1 ], 34 | "stride": [ 2, 2 ] 35 | }, 36 | "withBN": true, 37 | "bnFile": "conv1.1", 38 | "active_type": "relu", 39 | "eps": 1e-5 40 | }, 41 | { 42 | "layerStyle": "cba", 43 | "layerName": "relu_19", 44 | "inputName": "relu_16", 45 | "convFile": "conv1.3", 46 | "biasFile":"conv1.3", 47 | "parameter": { 48 | "input_c": 32, 49 | "output_c": 32, 50 | "kernel": [ 3, 3 ], 51 | "padding": [ 1, 1 ], 52 | "stride": [ 2, 2 ] 53 | }, 54 | "withBN": true, 55 | "bnFile": "conv1.4", 56 | "active_type": "relu", 57 | "eps": 1e-5 58 | }, 59 | { 60 | "layerStyle": "resnet", 61 | "layerName": "relu_26", 62 | "inputName": "relu_19", 63 | "right":[ 64 | [ ["layer1.0.conv1", 32, 3, 1, 1], 65 | ["layer1.0.bn1"], 66 | ["relu"] 67 | ], 68 | [ 69 | ["layer1.0.conv2", 32, 3, 1, 1], 70 | ["layer1.0.bn2"] 71 | ] 72 | ], 73 | "active_type": "relu" 74 | }, 75 | { 76 | "layerStyle": "resnet", 77 | "layerName": "relu_33", 78 | "inputName": "relu_26", 79 | "right":[ 80 | [ 81 | ["layer1.1.conv1", 32, 3, 1, 1], 82 | ["layer1.1.bn1"], 83 | ["relu"] 84 | ], 85 | [ 86 | ["layer1.1.conv2", 32, 3, 1, 1], 87 | ["layer1.1.bn2"] 88 | ] 89 | ], 90 | "active_type": "relu" 91 | }, 92 | { 93 | "layerStyle": "resnet", 94 | "layerName": "relu_42", 95 | "inputName": "relu_33", 96 | "left":[ 97 | [ 98 | ["layer2.0.conv1", 64, 3, 2, 1], 99 | ["layer2.0.bn1"], 100 | ["relu"] 101 | ], 102 | [ 103 | ["layer2.0.conv2", 64, 3, 1, 1], 104 | ["layer2.0.bn2"] 105 | ] 106 | ], 107 | "right":[ 108 | [ 109 | ["layer2.0.downsample.0", 64, 1, 2, 0], 110 | ["layer2.0.downsample.1"] 111 | ] 112 | ], 113 | "active_type": "relu" 114 | }, 115 | { 116 | "layerStyle": "resnet", 117 | "layerName": "add_48", 118 | "inputName": "relu_42", 119 | "right":[ 120 | [ 121 | ["layer2.1.conv1", 64, 3, 1, 1], 122 | ["layer2.1.bn1"], 123 | ["relu"] 124 | ], 125 | [ 126 | ["layer2.1.conv2", 64, 3, 1, 1], 127 | ["layer2.1.bn2"] 128 | ] 129 | ] 130 | }, 131 | { 132 | "layerStyle": "active", 133 | "layerName": "relu_65", 134 | "active_type": "relu", 135 | "inputName": "add_48" 136 | }, 137 | { 138 | "layerStyle": "resnet", 139 | "layerName": "relu_72", 140 | "inputName": "relu_65", 141 | "right":[ 142 | [ 143 | ["layer3_.0.conv1", 64, 3, 1, 1], 144 | ["layer3_.0.bn1"], 145 | ["relu"] 146 | ], 147 | [ 148 | ["layer3_.0.conv2", 64, 3, 1, 1], 149 | ["layer3_.0.bn2"] 150 | ] 151 | ], 152 | "active_type": "relu" 153 | }, 154 | { 155 | "layerStyle": "resnet", 156 | "layerName": "add_78", 157 | "inputName": "relu_72", 158 | "right":[ 159 | [ 160 | ["layer3_.1.conv1", 64, 3, 1, 1], 161 | ["layer3_.1.bn1"], 162 | ["relu"] 163 | ], 164 | [ 165 | ["layer3_.1.conv2", 64, 3, 1, 1], 166 | ["layer3_.1.bn2"] 167 | ] 168 | ] 169 | }, 170 | { 171 | "layerStyle": "active", 172 | "layerName": "relu_79", 173 | "active_type": "relu", 174 | "inputName": "add_78" 175 | }, 176 | { 177 | "layerStyle": "active", 178 | "layerName": "relu_49", 179 | "active_type": "relu", 180 | "inputName": "add_48" 181 | }, 182 | { 183 | "layerStyle": "resnet", 184 | "layerName": "relu_58", 185 | "inputName": "relu_49", 186 | "left":[ 187 | [ 188 | ["layer3.0.conv1", 128, 3, 2, 1], 189 | ["layer3.0.bn1"], 190 | ["relu"] 191 | ], 192 | [ 193 | ["layer3.0.conv2", 128, 3, 1, 1], 194 | ["layer3.0.bn2"] 195 | ] 196 | ], 197 | "right":[ 198 | [ 199 | ["layer3.0.downsample.0", 128, 1, 2, 0], 200 | ["layer3.0.downsample.1"] 201 | ] 202 | ], 203 | "active_type": "relu" 204 | }, 205 | { 206 | "layerStyle": "resnet", 207 | "layerName": "add_64", 208 | "inputName": "relu_58", 209 | "right":[ 210 | [ 211 | ["layer3.1.conv1", 128, 3, 1, 1], 212 | ["layer3.1.bn1"], 213 | ["relu"] 214 | ], 215 | [ 216 | ["layer3.1.conv2", 128, 3, 1, 1], 217 | ["layer3.1.bn2"] 218 | ] 219 | ] 220 | }, 221 | { 222 | "layerStyle": "cba", 223 | "layerName": "BatchNormalization_81", 224 | "inputName": "relu_79", 225 | "convFile": "down3.0", 226 | "parameter": { 227 | "input_c": 64, 228 | "output_c": 128, 229 | "kernel": [ 3, 3 ], 230 | "padding": [ 1, 1 ], 231 | "stride": [ 2, 2 ] 232 | }, 233 | "withBN": true, 234 | "bnFile": "down3.1" 235 | }, 236 | { 237 | "layerStyle": "eltwise", 238 | "layerName": "add_82", 239 | "inputName": [ "add_64", "BatchNormalization_81" ], 240 | "eltType": "kSUM" 241 | }, 242 | { 243 | "layerStyle": "active", 244 | "layerName": "relu_83", 245 | "active_type": "relu", 246 | "inputName": "add_64" 247 | }, 248 | { 249 | "layerStyle": "cba", 250 | "layerName": "BatchNormalization_85", 251 | "inputName": "relu_83", 252 | "convFile": "compression3.0", 253 | "parameter": { 254 | "input_c": 128, 255 | "output_c": 64, 256 | "kernel": [ 1, 1 ], 257 | "padding": [ 0, 0 ], 258 | "stride": [ 1, 1 ] 259 | }, 260 | "withBN": true, 261 | "bnFile": "compression3.1" 262 | }, 263 | { 264 | "layerStyle": "upsample", 265 | "layerName": "Upsample_96", 266 | "inputName": "BatchNormalization_85", 267 | "mode": "linear", 268 | "grid" : [64, 128] 269 | }, 270 | { 271 | "layerStyle": "eltwise", 272 | "layerName": "add_97", 273 | "inputName": [ "add_78", "Upsample_96" ], 274 | "eltType": "kSUM" 275 | }, 276 | { 277 | "layerStyle": "active", 278 | "layerName": "relu_114", 279 | "active_type": "relu", 280 | "inputName": "add_97" 281 | }, 282 | { 283 | "layerStyle": "active", 284 | "layerName": "relu_98", 285 | "active_type": "relu", 286 | "inputName": "add_82" 287 | }, 288 | { 289 | "layerStyle": "resnet", 290 | "layerName": "relu_107", 291 | "inputName": "relu_98", 292 | "left":[ 293 | [ 294 | ["layer4.0.conv1", 256, 3, 2, 1], 295 | ["layer4.0.bn1"], 296 | ["relu"] 297 | ], 298 | [ 299 | ["layer4.0.conv2", 256, 3, 1, 1], 300 | ["layer4.0.bn2"] 301 | ] 302 | ], 303 | "right":[ 304 | [ 305 | ["layer4.0.downsample.0", 256, 1, 2, 0], 306 | ["layer4.0.downsample.1"] 307 | ] 308 | ], 309 | "active_type": "relu" 310 | }, 311 | { 312 | "layerStyle": "resnet", 313 | "layerName": "add_113", 314 | "inputName": "relu_107", 315 | "right":[ 316 | [ 317 | ["layer4.1.conv1", 256, 3, 1, 1], 318 | ["layer4.1.bn1"], 319 | ["relu"] 320 | ], 321 | [ 322 | ["layer4.1.conv2", 256, 3, 1, 1], 323 | ["layer4.1.bn2"] 324 | ] 325 | ] 326 | }, 327 | { 328 | "layerStyle": "active", 329 | "layerName": "relu_135", 330 | "active_type": "relu", 331 | "inputName": "add_113" 332 | }, 333 | { 334 | "layerStyle": "cba", 335 | "layerName": "BatchNormalization_137", 336 | "inputName": "relu_135", 337 | "convFile": "compression4.0", 338 | "parameter": { 339 | "input_c": 256, 340 | "output_c": 64, 341 | "kernel": [ 1, 1 ], 342 | "padding": [ 0, 0 ], 343 | "stride": [ 1, 1 ] 344 | }, 345 | "withBN": true, 346 | "bnFile": "compression4.1" 347 | }, 348 | { 349 | "layerStyle": "resnet", 350 | "layerName": "relu_121", 351 | "inputName": "relu_114", 352 | "right":[ 353 | [ 354 | ["layer4_.0.conv1", 64, 3, 1, 1], 355 | ["layer4_.0.bn1"], 356 | ["relu"] 357 | ], 358 | [ 359 | ["layer4_.0.conv2", 64, 3, 1, 1], 360 | ["layer4_.0.bn2"] 361 | ] 362 | ], 363 | "active_type": "relu" 364 | }, 365 | { 366 | "layerStyle": "resnet", 367 | "layerName": "add_127", 368 | "inputName": "relu_121", 369 | "right":[ 370 | [ 371 | ["layer4_.1.conv1", 64, 3, 1, 1], 372 | ["layer4_.1.bn1"], 373 | ["relu"] 374 | ], 375 | [ 376 | ["layer4_.1.conv2", 64, 3, 1, 1], 377 | ["layer4_.1.bn2"] 378 | ] 379 | ] 380 | }, 381 | { 382 | "layerStyle": "upsample", 383 | "layerName": "Upsample_148", 384 | "inputName": "BatchNormalization_137", 385 | "mode": "linear", 386 | "grid" : [64, 128] 387 | }, 388 | { 389 | "layerStyle": "eltwise", 390 | "layerName": "add_149", 391 | "inputName": [ "add_127", "Upsample_148" ], 392 | "eltType": "kSUM" 393 | }, 394 | { 395 | "layerStyle": "active", 396 | "layerName": "relu_150", 397 | "active_type": "relu", 398 | "inputName": "add_149" 399 | }, 400 | { 401 | "layerStyle": "resnet", 402 | "layerName": "add_161", 403 | "inputName": "relu_150", 404 | "left":[ 405 | [ 406 | ["layer5_.0.conv1", 64, 1, 1, 0], 407 | ["layer5_.0.bn1"], 408 | ["relu"] 409 | ], 410 | [ 411 | ["layer5_.0.conv2", 64, 3, 1, 1], 412 | ["layer5_.0.bn2"], 413 | ["relu"] 414 | ], 415 | [ 416 | ["layer5_.0.conv3", 128, 1, 1, 0], 417 | ["layer5_.0.bn3"] 418 | ] 419 | ], 420 | "right":[ 421 | [ 422 | ["layer5_.0.downsample.0", 128, 1, 1, 0], 423 | ["layer5_.0.downsample.1"] 424 | ] 425 | ] 426 | }, 427 | { 428 | "layerStyle": "active", 429 | "layerName": "relu_128", 430 | "active_type": "relu", 431 | "inputName": "add_127" 432 | }, 433 | { 434 | "layerStyle": "cba", 435 | "layerName": "relu_131", 436 | "inputName": "relu_128", 437 | "convFile": "down4.0", 438 | "parameter": { 439 | "input_c": 64, 440 | "output_c": 128, 441 | "kernel": [ 3, 3 ], 442 | "padding": [ 1, 1 ], 443 | "stride": [ 2, 2 ] 444 | }, 445 | "withBN": true, 446 | "bnFile": "down4.1", 447 | "active_type": "relu", 448 | "eps": 1e-5 449 | }, 450 | { 451 | "layerStyle": "cba", 452 | "layerName": "BatchNormalization_133", 453 | "inputName": "relu_131", 454 | "convFile": "down4.3", 455 | "parameter": { 456 | "input_c": 128, 457 | "output_c": 256, 458 | "kernel": [ 3, 3 ], 459 | "padding": [ 1, 1 ], 460 | "stride": [ 2, 2 ] 461 | }, 462 | "withBN": true, 463 | "bnFile": "down4.4" 464 | }, 465 | { 466 | "layerStyle": "eltwise", 467 | "layerName": "add_134", 468 | "inputName": [ "add_113", "BatchNormalization_133" ], 469 | "eltType": "kSUM" 470 | }, 471 | { 472 | "layerStyle": "active", 473 | "layerName": "relu_162", 474 | "active_type": "relu", 475 | "inputName": "add_134" 476 | }, 477 | { 478 | "layerStyle": "resnet", 479 | "layerName": "add_173", 480 | "inputName": "relu_162", 481 | "left":[ 482 | [ 483 | ["layer5.0.conv1", 256, 1, 1, 0], 484 | ["layer5.0.bn1"], 485 | ["relu"] 486 | ], 487 | [ 488 | ["layer5.0.conv2", 256, 3, 2, 1], 489 | ["layer5.0.bn2"], 490 | ["relu"] 491 | ], 492 | [ 493 | ["layer5.0.conv3", 512, 1, 1, 0], 494 | ["layer5.0.bn3"] 495 | ] 496 | ], 497 | "right":[ 498 | [ 499 | ["layer5.0.downsample.0", 512, 1, 2, 0], 500 | ["layer5.0.downsample.1"] 501 | ] 502 | ] 503 | }, 504 | { 505 | "layerStyle": "bn", 506 | "layerName": "BatchNormalization_180", 507 | "inputName": "add_173", 508 | "weightFile": "spp.scale0.0" 509 | }, 510 | { 511 | "layerStyle": "active", 512 | "layerName": "relu_181", 513 | "active_type": "relu", 514 | "inputName": "BatchNormalization_180" 515 | }, 516 | { 517 | "layerStyle": "conv", 518 | "layerName": "conv_182", 519 | "inputName": "relu_181", 520 | "weightFile": "spp.scale0.2", 521 | "parameter": { 522 | "input_c": 512, 523 | "output_c": 128, 524 | "kernel": [ 1, 1 ], 525 | "padding": [ 0, 0 ], 526 | "stride": [ 1, 1 ] 527 | } 528 | }, 529 | { 530 | "layerStyle": "pool", 531 | "layerName": "AveragePool_184", 532 | "inputName": "add_173", 533 | "parameter": { 534 | "poolType": "kAVG", 535 | "kernel": [ 5, 5 ], 536 | "padding": [ 2, 2 ], 537 | "stride": [ 2, 2 ] 538 | } 539 | }, 540 | { 541 | "layerStyle": "bn", 542 | "layerName": "BatchNormalization_185", 543 | "inputName": "AveragePool_184", 544 | "weightFile": "spp.scale1.1" 545 | }, 546 | { 547 | 548 | "layerStyle": "active", 549 | "layerName": "relu_186", 550 | "active_type": "relu", 551 | "inputName": "BatchNormalization_185" 552 | }, 553 | { 554 | "layerStyle": "conv", 555 | "layerName": "conv_187", 556 | "inputName": "relu_186", 557 | "weightFile": "spp.scale1.3", 558 | "parameter": { 559 | "input_c": 512, 560 | "output_c": 128, 561 | "kernel": [ 1, 1 ], 562 | "padding": [ 0, 0 ], 563 | "stride": [ 1, 1 ] 564 | } 565 | }, 566 | { 567 | "layerStyle": "upsample", 568 | "layerName": "Upsample_198", 569 | "inputName": "conv_187", 570 | "mode": "linear", 571 | "grid" : [8, 16] 572 | }, 573 | { 574 | "layerStyle": "eltwise", 575 | "layerName": "add_199", 576 | "inputName": [ "conv_182", "Upsample_198" ], 577 | "eltType": "kSUM" 578 | }, 579 | { 580 | "layerStyle": "pool", 581 | "layerName": "AveragePool_204", 582 | "inputName": "add_173", 583 | "parameter": { 584 | "poolType": "kAVG", 585 | "kernel": [ 9, 9 ], 586 | "padding": [ 4, 4 ], 587 | "stride": [ 4, 4 ] 588 | } 589 | }, 590 | { 591 | "layerStyle": "bn", 592 | "layerName": "BatchNormalization_205", 593 | "inputName": "AveragePool_204", 594 | "weightFile": "spp.scale2.1" 595 | }, 596 | { 597 | "layerStyle": "active", 598 | "layerName": "relu_206", 599 | "active_type": "relu", 600 | "inputName": "BatchNormalization_205" 601 | }, 602 | { 603 | "layerStyle": "conv", 604 | "layerName": "conv_207", 605 | "inputName": "relu_206", 606 | "weightFile": "spp.scale2.3", 607 | "parameter": { 608 | "input_c": 512, 609 | "output_c": 128, 610 | "kernel": [ 1, 1 ], 611 | "padding": [ 0, 0 ], 612 | "stride": [ 1, 1 ] 613 | } 614 | }, 615 | { 616 | "layerStyle": "upsample", 617 | "layerName": "Upsample_218", 618 | "inputName": "conv_207", 619 | "mode": "linear", 620 | "grid" : [8, 16] 621 | }, 622 | { 623 | "layerStyle": "pool", 624 | "layerName": "AveragePool_224", 625 | "inputName": "add_173", 626 | "parameter": { 627 | "poolType": "kAVG", 628 | "kernel": [ 17, 17], 629 | "padding": [ 8, 8 ], 630 | "stride": [ 8, 8 ] 631 | } 632 | }, 633 | { 634 | 635 | "layerStyle": "bn", 636 | "layerName": "BatchNormalization_225", 637 | "inputName": "AveragePool_224", 638 | "weightFile": "spp.scale3.1" 639 | }, 640 | { 641 | "layerStyle": "active", 642 | "layerName": "relu_226", 643 | "active_type": "relu", 644 | "inputName": "BatchNormalization_225" 645 | }, 646 | { 647 | "layerStyle": "conv", 648 | "layerName": "conv_227", 649 | "inputName": "relu_226", 650 | "weightFile": "spp.scale3.3", 651 | "parameter": { 652 | "input_c": 512, 653 | "output_c": 128, 654 | "kernel": [ 1, 1 ], 655 | "padding": [ 0, 0 ], 656 | "stride": [ 1, 1 ] 657 | } 658 | }, 659 | { 660 | "layerStyle": "upsample", 661 | "layerName": "Upsample_238", 662 | "inputName": "conv_227", 663 | "mode": "linear", 664 | "grid" : [8, 16] 665 | }, 666 | { 667 | "layerStyle": "reduce", 668 | "layerName": "GlobalAveragePool_243", 669 | "inputName": "add_173", 670 | "axes": 6, 671 | "keepD": true 672 | }, 673 | { 674 | "layerStyle": "bn", 675 | "layerName": "BatchNormalization_244", 676 | "inputName": "GlobalAveragePool_243", 677 | "weightFile": "spp.scale4.1" 678 | }, 679 | { 680 | "layerStyle": "active", 681 | "layerName": "relu_245", 682 | "active_type": "relu", 683 | "inputName": "BatchNormalization_244" 684 | }, 685 | { 686 | "layerStyle": "conv", 687 | "layerName": "conv_246", 688 | "inputName": "relu_245", 689 | "weightFile": "spp.scale4.3", 690 | "parameter": { 691 | "input_c": 512, 692 | "output_c": 128, 693 | "kernel": [ 1, 1 ], 694 | "padding": [ 0, 0 ], 695 | "stride": [ 1, 1 ] 696 | } 697 | }, 698 | { 699 | "layerStyle": "upsample", 700 | "layerName": "Upsample_257", 701 | "inputName": "conv_246", 702 | "mode": "linear", 703 | "grid" : [8, 16] 704 | }, 705 | { 706 | "layerStyle": "bn", 707 | "layerName": "BatchNormalization_200", 708 | "inputName": "add_199", 709 | "weightFile": "spp.process1.0" 710 | }, 711 | { 712 | "layerStyle": "active", 713 | "layerName": "relu_201", 714 | "active_type": "relu", 715 | "inputName": "BatchNormalization_200" 716 | }, 717 | { 718 | "layerStyle": "conv", 719 | "layerName": "conv_202", 720 | "inputName": "relu_201", 721 | "weightFile": "spp.process1.2", 722 | "parameter": { 723 | "input_c": 128, 724 | "output_c": 128, 725 | "kernel": [ 3, 3 ], 726 | "padding": [ 1, 1 ], 727 | "stride": [ 1, 1 ] 728 | } 729 | }, 730 | { 731 | "layerStyle": "eltwise", 732 | "layerName": "add_219", 733 | "inputName": [ "conv_202", "Upsample_218" ], 734 | "eltType": "kSUM" 735 | }, 736 | { 737 | "layerStyle": "bn", 738 | "layerName": "BatchNormalization_220", 739 | "inputName": "add_219", 740 | "weightFile": "spp.process2.0" 741 | }, 742 | { 743 | "layerStyle": "active", 744 | "layerName": "relu_221", 745 | "active_type": "relu", 746 | "inputName": "BatchNormalization_220" 747 | }, 748 | { 749 | "layerStyle": "conv", 750 | "layerName": "conv_222", 751 | "inputName": "relu_221", 752 | "weightFile": "spp.process2.2", 753 | "parameter": { 754 | "input_c": 128, 755 | "output_c": 128, 756 | "kernel": [ 3, 3 ], 757 | "padding": [ 1, 1 ], 758 | "stride": [ 1, 1 ] 759 | } 760 | }, 761 | { 762 | "layerStyle": "eltwise", 763 | "layerName": "add_239", 764 | "inputName": [ "conv_222", "Upsample_238" ], 765 | "eltType": "kSUM" 766 | }, 767 | { 768 | "layerStyle": "bn", 769 | "layerName": "BatchNormalization_240", 770 | "inputName": "add_239", 771 | "weightFile": "spp.process3.0" 772 | }, 773 | { 774 | "layerStyle": "active", 775 | "layerName": "relu_241", 776 | "active_type": "relu", 777 | "inputName": "BatchNormalization_240" 778 | }, 779 | { 780 | "layerStyle": "conv", 781 | "layerName": "conv_242", 782 | "inputName": "relu_241", 783 | "weightFile": "spp.process3.2", 784 | "parameter": { 785 | "input_c": 128, 786 | "output_c": 128, 787 | "kernel": [ 3, 3 ], 788 | "padding": [ 1, 1 ], 789 | "stride": [ 1, 1 ] 790 | } 791 | }, 792 | { 793 | "layerStyle": "eltwise", 794 | "layerName": "add_258", 795 | "inputName": [ "conv_242", "Upsample_257" ], 796 | "eltType": "kSUM" 797 | }, 798 | { 799 | "layerStyle": "bn", 800 | "layerName": "BatchNormalization_259", 801 | "inputName": "add_258", 802 | "weightFile": "spp.process4.0" 803 | }, 804 | { 805 | "layerStyle": "active", 806 | "layerName": "relu_260", 807 | "active_type": "relu", 808 | "inputName": "BatchNormalization_259" 809 | }, 810 | { 811 | "layerStyle": "conv", 812 | "layerName": "conv_261", 813 | "inputName": "relu_260", 814 | "weightFile": "spp.process4.2", 815 | "parameter": { 816 | "input_c": 128, 817 | "output_c": 128, 818 | "kernel": [ 3, 3 ], 819 | "padding": [ 1, 1 ], 820 | "stride": [ 1, 1 ] 821 | } 822 | }, 823 | { 824 | "layerStyle": "concat", 825 | "layerName": "Concat_262", 826 | "axis": 0, 827 | "inputName": [ "conv_182", "conv_202", "conv_222", "conv_242", "conv_261" ] 828 | }, 829 | { 830 | 831 | "layerStyle": "bn", 832 | "layerName": "BatchNormalization_263", 833 | "inputName": "Concat_262", 834 | "weightFile": "spp.compression.0" 835 | }, 836 | { 837 | "layerStyle": "active", 838 | "layerName": "relu_264", 839 | "active_type": "relu", 840 | "inputName": "BatchNormalization_263" 841 | }, 842 | { 843 | "layerStyle": "conv", 844 | "layerName": "conv_265", 845 | "inputName": "relu_264", 846 | "weightFile": "spp.compression.2", 847 | "parameter": { 848 | "input_c": 640, 849 | "output_c": 128, 850 | "kernel": [ 1, 1 ], 851 | "padding": [ 0, 0 ], 852 | "stride": [ 1, 1 ] 853 | } 854 | }, 855 | { 856 | "layerStyle": "bn", 857 | "layerName": "BatchNormalization_266", 858 | "inputName": "add_173", 859 | "weightFile": "spp.shortcut.0" 860 | }, 861 | { 862 | "layerStyle": "active", 863 | "layerName": "relu_267", 864 | "active_type": "relu", 865 | "inputName": "BatchNormalization_266" 866 | }, 867 | { 868 | "layerStyle": "conv", 869 | "layerName": "conv_268", 870 | "inputName": "relu_267", 871 | "weightFile": "spp.shortcut.2", 872 | "parameter": { 873 | "input_c": 512, 874 | "output_c": 128, 875 | "kernel": [ 1, 1 ], 876 | "padding": [ 0, 0 ], 877 | "stride": [ 1, 1 ] 878 | } 879 | }, 880 | { 881 | "layerStyle": "eltwise", 882 | "layerName": "add_269", 883 | "inputName": [ "conv_265", "conv_268" ], 884 | "eltType": "kSUM" 885 | }, 886 | { 887 | "layerStyle": "upsample", 888 | "layerName": "Upsample_280", 889 | "inputName": "add_269", 890 | "mode": "linear", 891 | "grid" : [64, 128] 892 | }, 893 | { 894 | "layerStyle": "eltwise", 895 | "layerName": "add_281", 896 | "inputName": [ "add_161", "Upsample_280" ], 897 | "eltType": "kSUM" 898 | }, 899 | { 900 | "layerStyle": "bn", 901 | "layerName": "BatchNormalization_282", 902 | "inputName": "add_281", 903 | "weightFile": "final_layer.bn1" 904 | }, 905 | { 906 | "layerStyle": "active", 907 | "layerName": "relu_283", 908 | "active_type": "relu", 909 | "inputName": "BatchNormalization_282" 910 | }, 911 | { 912 | "layerStyle": "conv", 913 | "layerName": "conv_284", 914 | "inputName": "relu_283", 915 | "weightFile": "final_layer.conv1", 916 | "parameter": { 917 | "input_c": 128, 918 | "output_c": 64, 919 | "kernel": [ 3, 3 ], 920 | "padding": [ 1, 1 ], 921 | "stride": [ 1, 1 ] 922 | } 923 | }, 924 | { 925 | "layerStyle": "bn", 926 | "layerName": "BatchNormalization_285", 927 | "inputName": "conv_284", 928 | "weightFile": "final_layer.bn2" 929 | }, 930 | { 931 | "layerStyle": "active", 932 | "layerName": "relu_286", 933 | "active_type": "relu", 934 | "inputName": "BatchNormalization_285" 935 | }, 936 | { 937 | "layerStyle": "conv", 938 | "layerName": "conv_287", 939 | "inputName": "relu_286", 940 | "weightFile": "final_layer.conv2", 941 | "parameter": { 942 | "input_c": 64, 943 | "output_c": 19, 944 | "kernel": [ 1, 1 ], 945 | "padding": [ 0, 0 ], 946 | "stride": [ 1, 1 ] 947 | } 948 | }, 949 | { 950 | "layerStyle": "upsample", 951 | "layerName": "Upsample_final", 952 | "inputName": "conv_287", 953 | "mode": "linear", 954 | "grid" : [512, 1024] 955 | }, 956 | { 957 | "outputName": "output", 958 | "layerStyle": "topk", 959 | "layerName": "top_max", 960 | "inputName": "Upsample_final", 961 | "TopKOperation": "kMAX", 962 | "k": 1, 963 | "reduceAxes": 1, 964 | "outputIndex": 1 965 | } 966 | ] 967 | 968 | } 969 | -------------------------------------------------------------------------------- /model/ddrnet/ddrnet.json: -------------------------------------------------------------------------------- 1 | { 2 | "BatchSize": 1, 3 | "maxBatchsize":1, 4 | "indim":3, 5 | "input_c": 3, 6 | "input_h": 512, 7 | "input_w": 1024, 8 | "ENGPath": "/mnt/f/LearningCode/ddrnet/ddrnetwsl.eng", 9 | "weightsDir": "/mnt/f/LearningCode/ddrnet/wts/", 10 | "int8": false, 11 | "fp16": true, 12 | "div_255": true, 13 | "Mean": [ 0.485, 0.456, 0.406 ], 14 | "Std": [ 0.229, 0.224, 0.225 ], 15 | "inputBlobName": "data", 16 | "outputBlobName": "output", 17 | "outputSize": 4194304, 18 | "createENG": true, 19 | "doInfer": false, 20 | "imgDir": "/mnt/f/dataset/oneimg/", 21 | "imgType": "png", 22 | "network": [ 23 | { 24 | "layerStyle": "cba", 25 | "layerName": "relu_16", 26 | "inputName": "data", 27 | "convFile": "conv1.0", 28 | "biasFile":"conv1.0", 29 | "parameter": { 30 | "input_c": 3, 31 | "output_c": 32, 32 | "kernel": [ 3, 3 ], 33 | "padding": [ 1, 1 ], 34 | "stride": [ 2, 2 ] 35 | }, 36 | "withBN": true, 37 | "bnFile": "conv1.1", 38 | "active_type": "relu", 39 | "eps": 1e-5 40 | }, 41 | { 42 | "layerStyle": "cba", 43 | "layerName": "relu_19", 44 | "inputName": "relu_16", 45 | "convFile": "conv1.3", 46 | "biasFile":"conv1.3", 47 | "parameter": { 48 | "input_c": 32, 49 | "output_c": 32, 50 | "kernel": [ 3, 3 ], 51 | "padding": [ 1, 1 ], 52 | "stride": [ 2, 2 ] 53 | }, 54 | "withBN": true, 55 | "bnFile": "conv1.4", 56 | "active_type": "relu", 57 | "eps": 1e-5 58 | }, 59 | { 60 | "layerStyle": "resnet", 61 | "layerName": "relu_26", 62 | "inputName": "relu_19", 63 | "right":[ 64 | [ ["layer1.0.conv1", 32, 3, 1, 1], 65 | ["layer1.0.bn1"], 66 | ["relu"] 67 | ], 68 | [ 69 | ["layer1.0.conv2", 32, 3, 1, 1], 70 | ["layer1.0.bn2"] 71 | ] 72 | ], 73 | "active_type": "relu" 74 | }, 75 | { 76 | "layerStyle": "resnet", 77 | "layerName": "relu_33", 78 | "inputName": "relu_26", 79 | "right":[ 80 | [ 81 | ["layer1.1.conv1", 32, 3, 1, 1], 82 | ["layer1.1.bn1"], 83 | ["relu"] 84 | ], 85 | [ 86 | ["layer1.1.conv2", 32, 3, 1, 1], 87 | ["layer1.1.bn2"] 88 | ] 89 | ], 90 | "active_type": "relu" 91 | }, 92 | { 93 | "layerStyle": "resnet", 94 | "layerName": "relu_42", 95 | "inputName": "relu_33", 96 | "left":[ 97 | [ 98 | ["layer2.0.conv1", 64, 3, 2, 1], 99 | ["layer2.0.bn1"], 100 | ["relu"] 101 | ], 102 | [ 103 | ["layer2.0.conv2", 64, 3, 1, 1], 104 | ["layer2.0.bn2"] 105 | ] 106 | ], 107 | "right":[ 108 | [ 109 | ["layer2.0.downsample.0", 64, 1, 2, 0], 110 | ["layer2.0.downsample.1"] 111 | ] 112 | ], 113 | "active_type": "relu" 114 | }, 115 | { 116 | "layerStyle": "resnet", 117 | "layerName": "add_48", 118 | "inputName": "relu_42", 119 | "right":[ 120 | [ 121 | ["layer2.1.conv1", 64, 3, 1, 1], 122 | ["layer2.1.bn1"], 123 | ["relu"] 124 | ], 125 | [ 126 | ["layer2.1.conv2", 64, 3, 1, 1], 127 | ["layer2.1.bn2"] 128 | ] 129 | ] 130 | }, 131 | { 132 | "layerStyle": "active", 133 | "layerName": "relu_65", 134 | "active_type": "relu", 135 | "inputName": "add_48" 136 | }, 137 | { 138 | "layerStyle": "resnet", 139 | "layerName": "relu_72", 140 | "inputName": "relu_65", 141 | "right":[ 142 | [ 143 | ["layer3_.0.conv1", 64, 3, 1, 1], 144 | ["layer3_.0.bn1"], 145 | ["relu"] 146 | ], 147 | [ 148 | ["layer3_.0.conv2", 64, 3, 1, 1], 149 | ["layer3_.0.bn2"] 150 | ] 151 | ], 152 | "active_type": "relu" 153 | }, 154 | { 155 | "layerStyle": "resnet", 156 | "layerName": "add_78", 157 | "inputName": "relu_72", 158 | "right":[ 159 | [ 160 | ["layer3_.1.conv1", 64, 3, 1, 1], 161 | ["layer3_.1.bn1"], 162 | ["relu"] 163 | ], 164 | [ 165 | ["layer3_.1.conv2", 64, 3, 1, 1], 166 | ["layer3_.1.bn2"] 167 | ] 168 | ] 169 | }, 170 | { 171 | "layerStyle": "active", 172 | "layerName": "relu_79", 173 | "active_type": "relu", 174 | "inputName": "add_78" 175 | }, 176 | { 177 | "layerStyle": "active", 178 | "layerName": "relu_49", 179 | "active_type": "relu", 180 | "inputName": "add_48" 181 | }, 182 | { 183 | "layerStyle": "resnet", 184 | "layerName": "relu_58", 185 | "inputName": "relu_49", 186 | "left":[ 187 | [ 188 | ["layer3.0.conv1", 128, 3, 2, 1], 189 | ["layer3.0.bn1"], 190 | ["relu"] 191 | ], 192 | [ 193 | ["layer3.0.conv2", 128, 3, 1, 1], 194 | ["layer3.0.bn2"] 195 | ] 196 | ], 197 | "right":[ 198 | [ 199 | ["layer3.0.downsample.0", 128, 1, 2, 0], 200 | ["layer3.0.downsample.1"] 201 | ] 202 | ], 203 | "active_type": "relu" 204 | }, 205 | { 206 | "layerStyle": "resnet", 207 | "layerName": "add_64", 208 | "inputName": "relu_58", 209 | "right":[ 210 | [ 211 | ["layer3.1.conv1", 128, 3, 1, 1], 212 | ["layer3.1.bn1"], 213 | ["relu"] 214 | ], 215 | [ 216 | ["layer3.1.conv2", 128, 3, 1, 1], 217 | ["layer3.1.bn2"] 218 | ] 219 | ] 220 | }, 221 | { 222 | "layerStyle": "cba", 223 | "layerName": "BatchNormalization_81", 224 | "inputName": "relu_79", 225 | "convFile": "down3.0", 226 | "parameter": { 227 | "input_c": 64, 228 | "output_c": 128, 229 | "kernel": [ 3, 3 ], 230 | "padding": [ 1, 1 ], 231 | "stride": [ 2, 2 ] 232 | }, 233 | "withBN": true, 234 | "bnFile": "down3.1" 235 | }, 236 | { 237 | "layerStyle": "eltwise", 238 | "layerName": "add_82", 239 | "inputName": [ "add_64", "BatchNormalization_81" ], 240 | "eltType": "kSUM" 241 | }, 242 | { 243 | "layerStyle": "active", 244 | "layerName": "relu_83", 245 | "active_type": "relu", 246 | "inputName": "add_64" 247 | }, 248 | { 249 | "layerStyle": "cba", 250 | "layerName": "BatchNormalization_85", 251 | "inputName": "relu_83", 252 | "convFile": "compression3.0", 253 | "parameter": { 254 | "input_c": 128, 255 | "output_c": 64, 256 | "kernel": [ 1, 1 ], 257 | "padding": [ 0, 0 ], 258 | "stride": [ 1, 1 ] 259 | }, 260 | "withBN": true, 261 | "bnFile": "compression3.1" 262 | }, 263 | { 264 | "layerStyle": "upsample", 265 | "layerName": "Upsample_96", 266 | "inputName": "BatchNormalization_85", 267 | "mode": "linear", 268 | "grid" : [64, 128] 269 | }, 270 | { 271 | "layerStyle": "eltwise", 272 | "layerName": "add_97", 273 | "inputName": [ "add_78", "Upsample_96" ], 274 | "eltType": "kSUM" 275 | }, 276 | { 277 | "layerStyle": "active", 278 | "layerName": "relu_114", 279 | "active_type": "relu", 280 | "inputName": "add_97" 281 | }, 282 | { 283 | "layerStyle": "active", 284 | "layerName": "relu_98", 285 | "active_type": "relu", 286 | "inputName": "add_82" 287 | }, 288 | { 289 | "layerStyle": "resnet", 290 | "layerName": "relu_107", 291 | "inputName": "relu_98", 292 | "left":[ 293 | [ 294 | ["layer4.0.conv1", 256, 3, 2, 1], 295 | ["layer4.0.bn1"], 296 | ["relu"] 297 | ], 298 | [ 299 | ["layer4.0.conv2", 256, 3, 1, 1], 300 | ["layer4.0.bn2"] 301 | ] 302 | ], 303 | "right":[ 304 | [ 305 | ["layer4.0.downsample.0", 256, 1, 2, 0], 306 | ["layer4.0.downsample.1"] 307 | ] 308 | ], 309 | "active_type": "relu" 310 | }, 311 | { 312 | "layerStyle": "resnet", 313 | "layerName": "add_113", 314 | "inputName": "relu_107", 315 | "right":[ 316 | [ 317 | ["layer4.1.conv1", 256, 3, 1, 1], 318 | ["layer4.1.bn1"], 319 | ["relu"] 320 | ], 321 | [ 322 | ["layer4.1.conv2", 256, 3, 1, 1], 323 | ["layer4.1.bn2"] 324 | ] 325 | ] 326 | }, 327 | { 328 | "layerStyle": "active", 329 | "layerName": "relu_135", 330 | "active_type": "relu", 331 | "inputName": "add_113" 332 | }, 333 | { 334 | "layerStyle": "cba", 335 | "layerName": "BatchNormalization_137", 336 | "inputName": "relu_135", 337 | "convFile": "compression4.0", 338 | "parameter": { 339 | "input_c": 256, 340 | "output_c": 64, 341 | "kernel": [ 1, 1 ], 342 | "padding": [ 0, 0 ], 343 | "stride": [ 1, 1 ] 344 | }, 345 | "withBN": true, 346 | "bnFile": "compression4.1" 347 | }, 348 | { 349 | "layerStyle": "resnet", 350 | "layerName": "relu_121", 351 | "inputName": "relu_114", 352 | "right":[ 353 | [ 354 | ["layer4_.0.conv1", 64, 3, 1, 1], 355 | ["layer4_.0.bn1"], 356 | ["relu"] 357 | ], 358 | [ 359 | ["layer4_.0.conv2", 64, 3, 1, 1], 360 | ["layer4_.0.bn2"] 361 | ] 362 | ], 363 | "active_type": "relu" 364 | }, 365 | { 366 | "layerStyle": "resnet", 367 | "layerName": "add_127", 368 | "inputName": "relu_121", 369 | "right":[ 370 | [ 371 | ["layer4_.1.conv1", 64, 3, 1, 1], 372 | ["layer4_.1.bn1"], 373 | ["relu"] 374 | ], 375 | [ 376 | ["layer4_.1.conv2", 64, 3, 1, 1], 377 | ["layer4_.1.bn2"] 378 | ] 379 | ] 380 | }, 381 | { 382 | "layerStyle": "upsample", 383 | "layerName": "Upsample_148", 384 | "inputName": "BatchNormalization_137", 385 | "mode": "linear", 386 | "grid" : [64, 128] 387 | }, 388 | { 389 | "layerStyle": "eltwise", 390 | "layerName": "add_149", 391 | "inputName": [ "add_127", "Upsample_148" ], 392 | "eltType": "kSUM" 393 | }, 394 | { 395 | "layerStyle": "active", 396 | "layerName": "relu_150", 397 | "active_type": "relu", 398 | "inputName": "add_149" 399 | }, 400 | { 401 | "layerStyle": "resnet", 402 | "layerName": "add_161", 403 | "inputName": "relu_150", 404 | "left":[ 405 | [ 406 | ["layer5_.0.conv1", 64, 1, 1, 0], 407 | ["layer5_.0.bn1"], 408 | ["relu"] 409 | ], 410 | [ 411 | ["layer5_.0.conv2", 64, 3, 1, 1], 412 | ["layer5_.0.bn2"], 413 | ["relu"] 414 | ], 415 | [ 416 | ["layer5_.0.conv3", 128, 1, 1, 0], 417 | ["layer5_.0.bn3"] 418 | ] 419 | ], 420 | "right":[ 421 | [ 422 | ["layer5_.0.downsample.0", 128, 1, 1, 0], 423 | ["layer5_.0.downsample.1"] 424 | ] 425 | ] 426 | }, 427 | { 428 | "layerStyle": "active", 429 | "layerName": "relu_128", 430 | "active_type": "relu", 431 | "inputName": "add_127" 432 | }, 433 | { 434 | "layerStyle": "cba", 435 | "layerName": "relu_131", 436 | "inputName": "relu_128", 437 | "convFile": "down4.0", 438 | "parameter": { 439 | "input_c": 64, 440 | "output_c": 128, 441 | "kernel": [ 3, 3 ], 442 | "padding": [ 1, 1 ], 443 | "stride": [ 2, 2 ] 444 | }, 445 | "withBN": true, 446 | "bnFile": "down4.1", 447 | "active_type": "relu", 448 | "eps": 1e-5 449 | }, 450 | { 451 | "layerStyle": "cba", 452 | "layerName": "BatchNormalization_133", 453 | "inputName": "relu_131", 454 | "convFile": "down4.3", 455 | "parameter": { 456 | "input_c": 128, 457 | "output_c": 256, 458 | "kernel": [ 3, 3 ], 459 | "padding": [ 1, 1 ], 460 | "stride": [ 2, 2 ] 461 | }, 462 | "withBN": true, 463 | "bnFile": "down4.4" 464 | }, 465 | { 466 | "layerStyle": "eltwise", 467 | "layerName": "add_134", 468 | "inputName": [ "add_113", "BatchNormalization_133" ], 469 | "eltType": "kSUM" 470 | }, 471 | { 472 | "layerStyle": "active", 473 | "layerName": "relu_162", 474 | "active_type": "relu", 475 | "inputName": "add_134" 476 | }, 477 | { 478 | "layerStyle": "resnet", 479 | "layerName": "add_173", 480 | "inputName": "relu_162", 481 | "left":[ 482 | [ 483 | ["layer5.0.conv1", 256, 1, 1, 0], 484 | ["layer5.0.bn1"], 485 | ["relu"] 486 | ], 487 | [ 488 | ["layer5.0.conv2", 256, 3, 2, 1], 489 | ["layer5.0.bn2"], 490 | ["relu"] 491 | ], 492 | [ 493 | ["layer5.0.conv3", 512, 1, 1, 0], 494 | ["layer5.0.bn3"] 495 | ] 496 | ], 497 | "right":[ 498 | [ 499 | ["layer5.0.downsample.0", 512, 1, 2, 0], 500 | ["layer5.0.downsample.1"] 501 | ] 502 | ] 503 | }, 504 | { 505 | "layerStyle": "bn", 506 | "layerName": "BatchNormalization_180", 507 | "inputName": "add_173", 508 | "weightFile": "spp.scale0.0" 509 | }, 510 | { 511 | "layerStyle": "active", 512 | "layerName": "relu_181", 513 | "active_type": "relu", 514 | "inputName": "BatchNormalization_180" 515 | }, 516 | { 517 | "layerStyle": "conv", 518 | "layerName": "conv_182", 519 | "inputName": "relu_181", 520 | "weightFile": "spp.scale0.2", 521 | "parameter": { 522 | "input_c": 512, 523 | "output_c": 128, 524 | "kernel": [ 1, 1 ], 525 | "padding": [ 0, 0 ], 526 | "stride": [ 1, 1 ] 527 | } 528 | }, 529 | { 530 | "layerStyle": "pool", 531 | "layerName": "AveragePool_184", 532 | "inputName": "add_173", 533 | "parameter": { 534 | "poolType": "kAVG", 535 | "kernel": [ 5, 5 ], 536 | "padding": [ 2, 2 ], 537 | "stride": [ 2, 2 ] 538 | } 539 | }, 540 | { 541 | "layerStyle": "bn", 542 | "layerName": "BatchNormalization_185", 543 | "inputName": "AveragePool_184", 544 | "weightFile": "spp.scale1.1" 545 | }, 546 | { 547 | 548 | "layerStyle": "active", 549 | "layerName": "relu_186", 550 | "active_type": "relu", 551 | "inputName": "BatchNormalization_185" 552 | }, 553 | { 554 | "layerStyle": "conv", 555 | "layerName": "conv_187", 556 | "inputName": "relu_186", 557 | "weightFile": "spp.scale1.3", 558 | "parameter": { 559 | "input_c": 512, 560 | "output_c": 128, 561 | "kernel": [ 1, 1 ], 562 | "padding": [ 0, 0 ], 563 | "stride": [ 1, 1 ] 564 | } 565 | }, 566 | { 567 | "layerStyle": "upsample", 568 | "layerName": "Upsample_198", 569 | "inputName": "conv_187", 570 | "mode": "linear", 571 | "grid" : [8, 16] 572 | }, 573 | { 574 | "layerStyle": "eltwise", 575 | "layerName": "add_199", 576 | "inputName": [ "conv_182", "Upsample_198" ], 577 | "eltType": "kSUM" 578 | }, 579 | { 580 | "layerStyle": "pool", 581 | "layerName": "AveragePool_204", 582 | "inputName": "add_173", 583 | "parameter": { 584 | "poolType": "kAVG", 585 | "kernel": [ 9, 9 ], 586 | "padding": [ 4, 4 ], 587 | "stride": [ 4, 4 ] 588 | } 589 | }, 590 | { 591 | "layerStyle": "bn", 592 | "layerName": "BatchNormalization_205", 593 | "inputName": "AveragePool_204", 594 | "weightFile": "spp.scale2.1" 595 | }, 596 | { 597 | "layerStyle": "active", 598 | "layerName": "relu_206", 599 | "active_type": "relu", 600 | "inputName": "BatchNormalization_205" 601 | }, 602 | { 603 | "layerStyle": "conv", 604 | "layerName": "conv_207", 605 | "inputName": "relu_206", 606 | "weightFile": "spp.scale2.3", 607 | "parameter": { 608 | "input_c": 512, 609 | "output_c": 128, 610 | "kernel": [ 1, 1 ], 611 | "padding": [ 0, 0 ], 612 | "stride": [ 1, 1 ] 613 | } 614 | }, 615 | { 616 | "layerStyle": "upsample", 617 | "layerName": "Upsample_218", 618 | "inputName": "conv_207", 619 | "mode": "linear", 620 | "grid" : [8, 16] 621 | }, 622 | { 623 | "layerStyle": "pool", 624 | "layerName": "AveragePool_224", 625 | "inputName": "add_173", 626 | "parameter": { 627 | "poolType": "kAVG", 628 | "kernel": [ 17, 17], 629 | "padding": [ 8, 8 ], 630 | "stride": [ 8, 8 ] 631 | } 632 | }, 633 | { 634 | 635 | "layerStyle": "bn", 636 | "layerName": "BatchNormalization_225", 637 | "inputName": "AveragePool_224", 638 | "weightFile": "spp.scale3.1" 639 | }, 640 | { 641 | "layerStyle": "active", 642 | "layerName": "relu_226", 643 | "active_type": "relu", 644 | "inputName": "BatchNormalization_225" 645 | }, 646 | { 647 | "layerStyle": "conv", 648 | "layerName": "conv_227", 649 | "inputName": "relu_226", 650 | "weightFile": "spp.scale3.3", 651 | "parameter": { 652 | "input_c": 512, 653 | "output_c": 128, 654 | "kernel": [ 1, 1 ], 655 | "padding": [ 0, 0 ], 656 | "stride": [ 1, 1 ] 657 | } 658 | }, 659 | { 660 | "layerStyle": "upsample", 661 | "layerName": "Upsample_238", 662 | "inputName": "conv_227", 663 | "mode": "linear", 664 | "grid" : [8, 16] 665 | }, 666 | { 667 | "layerStyle": "reduce", 668 | "layerName": "GlobalAveragePool_243", 669 | "inputName": "add_173", 670 | "axes": 6, 671 | "keepD": true 672 | }, 673 | { 674 | "layerStyle": "bn", 675 | "layerName": "BatchNormalization_244", 676 | "inputName": "GlobalAveragePool_243", 677 | "weightFile": "spp.scale4.1" 678 | }, 679 | { 680 | "layerStyle": "active", 681 | "layerName": "relu_245", 682 | "active_type": "relu", 683 | "inputName": "BatchNormalization_244" 684 | }, 685 | { 686 | "layerStyle": "conv", 687 | "layerName": "conv_246", 688 | "inputName": "relu_245", 689 | "weightFile": "spp.scale4.3", 690 | "parameter": { 691 | "input_c": 512, 692 | "output_c": 128, 693 | "kernel": [ 1, 1 ], 694 | "padding": [ 0, 0 ], 695 | "stride": [ 1, 1 ] 696 | } 697 | }, 698 | { 699 | "layerStyle": "upsample", 700 | "layerName": "Upsample_257", 701 | "inputName": "conv_246", 702 | "mode": "linear", 703 | "grid" : [8, 16] 704 | }, 705 | { 706 | "layerStyle": "bn", 707 | "layerName": "BatchNormalization_200", 708 | "inputName": "add_199", 709 | "weightFile": "spp.process1.0" 710 | }, 711 | { 712 | "layerStyle": "active", 713 | "layerName": "relu_201", 714 | "active_type": "relu", 715 | "inputName": "BatchNormalization_200" 716 | }, 717 | { 718 | "layerStyle": "conv", 719 | "layerName": "conv_202", 720 | "inputName": "relu_201", 721 | "weightFile": "spp.process1.2", 722 | "parameter": { 723 | "input_c": 128, 724 | "output_c": 128, 725 | "kernel": [ 3, 3 ], 726 | "padding": [ 1, 1 ], 727 | "stride": [ 1, 1 ] 728 | } 729 | }, 730 | { 731 | "layerStyle": "eltwise", 732 | "layerName": "add_219", 733 | "inputName": [ "conv_202", "Upsample_218" ], 734 | "eltType": "kSUM" 735 | }, 736 | { 737 | "layerStyle": "bn", 738 | "layerName": "BatchNormalization_220", 739 | "inputName": "add_219", 740 | "weightFile": "spp.process2.0" 741 | }, 742 | { 743 | "layerStyle": "active", 744 | "layerName": "relu_221", 745 | "active_type": "relu", 746 | "inputName": "BatchNormalization_220" 747 | }, 748 | { 749 | "layerStyle": "conv", 750 | "layerName": "conv_222", 751 | "inputName": "relu_221", 752 | "weightFile": "spp.process2.2", 753 | "parameter": { 754 | "input_c": 128, 755 | "output_c": 128, 756 | "kernel": [ 3, 3 ], 757 | "padding": [ 1, 1 ], 758 | "stride": [ 1, 1 ] 759 | } 760 | }, 761 | { 762 | "layerStyle": "eltwise", 763 | "layerName": "add_239", 764 | "inputName": [ "conv_222", "Upsample_238" ], 765 | "eltType": "kSUM" 766 | }, 767 | { 768 | "layerStyle": "bn", 769 | "layerName": "BatchNormalization_240", 770 | "inputName": "add_239", 771 | "weightFile": "spp.process3.0" 772 | }, 773 | { 774 | "layerStyle": "active", 775 | "layerName": "relu_241", 776 | "active_type": "relu", 777 | "inputName": "BatchNormalization_240" 778 | }, 779 | { 780 | "layerStyle": "conv", 781 | "layerName": "conv_242", 782 | "inputName": "relu_241", 783 | "weightFile": "spp.process3.2", 784 | "parameter": { 785 | "input_c": 128, 786 | "output_c": 128, 787 | "kernel": [ 3, 3 ], 788 | "padding": [ 1, 1 ], 789 | "stride": [ 1, 1 ] 790 | } 791 | }, 792 | { 793 | "layerStyle": "eltwise", 794 | "layerName": "add_258", 795 | "inputName": [ "conv_242", "Upsample_257" ], 796 | "eltType": "kSUM" 797 | }, 798 | { 799 | "layerStyle": "bn", 800 | "layerName": "BatchNormalization_259", 801 | "inputName": "add_258", 802 | "weightFile": "spp.process4.0" 803 | }, 804 | { 805 | "layerStyle": "active", 806 | "layerName": "relu_260", 807 | "active_type": "relu", 808 | "inputName": "BatchNormalization_259" 809 | }, 810 | { 811 | "layerStyle": "conv", 812 | "layerName": "conv_261", 813 | "inputName": "relu_260", 814 | "weightFile": "spp.process4.2", 815 | "parameter": { 816 | "input_c": 128, 817 | "output_c": 128, 818 | "kernel": [ 3, 3 ], 819 | "padding": [ 1, 1 ], 820 | "stride": [ 1, 1 ] 821 | } 822 | }, 823 | { 824 | "layerStyle": "concat", 825 | "layerName": "Concat_262", 826 | "axis": 0, 827 | "inputName": [ "conv_182", "conv_202", "conv_222", "conv_242", "conv_261" ] 828 | }, 829 | { 830 | 831 | "layerStyle": "bn", 832 | "layerName": "BatchNormalization_263", 833 | "inputName": "Concat_262", 834 | "weightFile": "spp.compression.0" 835 | }, 836 | { 837 | "layerStyle": "active", 838 | "layerName": "relu_264", 839 | "active_type": "relu", 840 | "inputName": "BatchNormalization_263" 841 | }, 842 | { 843 | "layerStyle": "conv", 844 | "layerName": "conv_265", 845 | "inputName": "relu_264", 846 | "weightFile": "spp.compression.2", 847 | "parameter": { 848 | "input_c": 640, 849 | "output_c": 128, 850 | "kernel": [ 1, 1 ], 851 | "padding": [ 0, 0 ], 852 | "stride": [ 1, 1 ] 853 | } 854 | }, 855 | { 856 | "layerStyle": "bn", 857 | "layerName": "BatchNormalization_266", 858 | "inputName": "add_173", 859 | "weightFile": "spp.shortcut.0" 860 | }, 861 | { 862 | "layerStyle": "active", 863 | "layerName": "relu_267", 864 | "active_type": "relu", 865 | "inputName": "BatchNormalization_266" 866 | }, 867 | { 868 | "layerStyle": "conv", 869 | "layerName": "conv_268", 870 | "inputName": "relu_267", 871 | "weightFile": "spp.shortcut.2", 872 | "parameter": { 873 | "input_c": 512, 874 | "output_c": 128, 875 | "kernel": [ 1, 1 ], 876 | "padding": [ 0, 0 ], 877 | "stride": [ 1, 1 ] 878 | } 879 | }, 880 | { 881 | "layerStyle": "eltwise", 882 | "layerName": "add_269", 883 | "inputName": [ "conv_265", "conv_268" ], 884 | "eltType": "kSUM" 885 | }, 886 | { 887 | "layerStyle": "upsample", 888 | "layerName": "Upsample_280", 889 | "inputName": "add_269", 890 | "mode": "linear", 891 | "grid" : [64, 128] 892 | }, 893 | { 894 | "layerStyle": "eltwise", 895 | "layerName": "add_281", 896 | "inputName": [ "add_161", "Upsample_280" ], 897 | "eltType": "kSUM" 898 | }, 899 | { 900 | "layerStyle": "bn", 901 | "layerName": "BatchNormalization_282", 902 | "inputName": "add_281", 903 | "weightFile": "final_layer.bn1" 904 | }, 905 | { 906 | "layerStyle": "active", 907 | "layerName": "relu_283", 908 | "active_type": "relu", 909 | "inputName": "BatchNormalization_282" 910 | }, 911 | { 912 | "layerStyle": "conv", 913 | "layerName": "conv_284", 914 | "inputName": "relu_283", 915 | "weightFile": "final_layer.conv1", 916 | "parameter": { 917 | "input_c": 128, 918 | "output_c": 64, 919 | "kernel": [ 3, 3 ], 920 | "padding": [ 1, 1 ], 921 | "stride": [ 1, 1 ] 922 | } 923 | }, 924 | { 925 | "layerStyle": "bn", 926 | "layerName": "BatchNormalization_285", 927 | "inputName": "conv_284", 928 | "weightFile": "final_layer.bn2" 929 | }, 930 | { 931 | "layerStyle": "active", 932 | "layerName": "relu_286", 933 | "active_type": "relu", 934 | "inputName": "BatchNormalization_285" 935 | }, 936 | { 937 | "layerStyle": "conv", 938 | "layerName": "conv_287", 939 | "inputName": "relu_286", 940 | "weightFile": "final_layer.conv2", 941 | "parameter": { 942 | "input_c": 64, 943 | "output_c": 19, 944 | "kernel": [ 1, 1 ], 945 | "padding": [ 0, 0 ], 946 | "stride": [ 1, 1 ] 947 | } 948 | }, 949 | { 950 | "layerStyle": "upsample", 951 | "layerName": "Upsample_final", 952 | "inputName": "conv_287", 953 | "mode": "linear", 954 | "grid" : [512, 1024] 955 | }, 956 | { 957 | "outputName": "output", 958 | "layerStyle": "topk", 959 | "layerName": "top_max", 960 | "inputName": "Upsample_final", 961 | "TopKOperation": "kMAX", 962 | "k": 1, 963 | "reduceAxes": 1, 964 | "outputIndex": 1 965 | } 966 | ] 967 | 968 | } 969 | -------------------------------------------------------------------------------- /model/deeplabV3_res50.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_c": 3, 3 | "input_h": 640, 4 | "input_w": 640, 5 | "ENGPath": "D:/deeplabV3/yunsheng/20210422/mv.pg", 6 | "weightsDir": "D:/deeplabV3/yunsheng/20210422/wts/", 7 | "fp16": true, 8 | "int8": false, 9 | "div_255": true, 10 | "Mean": [ 0.485, 0.456, 0.406 ], 11 | "Std": [ 0.229, 0.224, 0.225 ], 12 | "inputBlobName": "data", 13 | "outputBlobName": "output", 14 | "maxBatchsize": 10, 15 | "outputSize": 409600, 16 | "network": [ 17 | //{ 18 | // "layerStyle": "M_S", 19 | // "layerName": "preInput", 20 | // "inputName": "data", 21 | // "div255": true, 22 | // "Mean": [ 0.485, 0.456, 0.406 ], 23 | // "Std": [ 0.229, 0.224, 0.225 ] 24 | //}, 25 | { 26 | "layerStyle": "cba", 27 | "layerName": "conv_0", 28 | "inputName": "data", 29 | "convFile": "0.0.conv1", 30 | "parameter": { 31 | "input_c": 3, 32 | "output_c": 64, 33 | "kernel": [ 7, 7 ], 34 | "padding": [ 3, 3 ], 35 | "stride": [ 2, 2 ] 36 | }, 37 | "bnFile": "0.0.bn1", 38 | "active_type": "relu" 39 | }, 40 | { 41 | "layerStyle": "pool", 42 | "layerName": "maxpool_3", 43 | "inputName": "conv_0", 44 | "parameter": { 45 | "poolType": "kMAX", 46 | "kernel": [ 3, 3 ], 47 | "padding": [ 1, 1 ], 48 | "stride": [ 2, 2 ] 49 | } 50 | }, 51 | { 52 | "layerStyle": "cba", 53 | "layerName": "conv_4", 54 | "inputName": "maxpool_3", 55 | "convFile": "0.0.layer1.0.conv1", 56 | "parameter": { 57 | "input_c": 64, 58 | "output_c": 64, 59 | "kernel": [ 1, 1 ], 60 | "padding": [ 0, 0 ], 61 | "stride": [ 1, 1 ] 62 | }, 63 | "bnFile": "0.0.layer1.0.bn1", 64 | "active_type": "relu" 65 | }, 66 | { 67 | "layerStyle": "cba", 68 | "layerName": "conv_7", 69 | "inputName": "conv_4", 70 | "convFile": "0.0.layer1.0.conv2", 71 | "parameter": { 72 | "input_c": 64, 73 | "output_c": 64, 74 | "kernel": [ 3, 3 ], 75 | "padding": [ 1, 1 ], 76 | "stride": [ 1, 1 ] 77 | }, 78 | "bnFile": "0.0.layer1.0.bn2", 79 | "active_type": "relu" 80 | }, 81 | { 82 | "layerStyle": "cba", 83 | "layerName": "conv_10", 84 | "inputName": "conv_7", 85 | "convFile": "0.0.layer1.0.conv3", 86 | "parameter": { 87 | "input_c": 64, 88 | "output_c": 256, 89 | "kernel": [ 1, 1 ], 90 | "padding": [ 0, 0 ], 91 | "stride": [ 1, 1 ] 92 | }, 93 | "bnFile": "0.0.layer1.0.bn3" 94 | }, 95 | { 96 | "layerStyle": "cba", 97 | "layerName": "conv_12", 98 | "inputName": "maxpool_3", 99 | "convFile": "0.0.layer1.0.downsample.0", 100 | "parameter": { 101 | "input_c": 64, 102 | "output_c": 256, 103 | "kernel": [ 1, 1 ], 104 | "padding": [ 0, 0 ], 105 | "stride": [ 1, 1 ] 106 | }, 107 | "bnFile": "0.0.layer1.0.downsample.1" 108 | }, 109 | { 110 | "layerStyle": "eltwise", 111 | "layerName": "Add_14", 112 | "inputName": [ "conv_10", "conv_12" ], 113 | "eltType": "kSUM" 114 | }, 115 | { 116 | "layerStyle": "active", 117 | "layerName": "relu_15", 118 | "active_type": "relu", 119 | "inputName": "Add_14" 120 | }, 121 | { 122 | "layerStyle": "resnet3", 123 | "layerName": "resnet1_1", 124 | "inputName": "relu_15", 125 | "downsample": false, 126 | "weightsFile": "0.0.layer1.1", 127 | "parameter": { 128 | "input_c": 256, 129 | "temp_c": 64, 130 | "output_c": 256 131 | } 132 | }, 133 | { 134 | "layerStyle": "resnet3", 135 | "layerName": "resnet1_2", 136 | "inputName": "resnet1_1", 137 | "downsample": false, 138 | 139 | "weightsFile": "0.0.layer1.2", 140 | "parameter": { 141 | "input_c": 256, 142 | "temp_c": 64, 143 | "output_c": 256 144 | } 145 | }, //resnet1_end 146 | { 147 | "layerStyle": "resnet3", 148 | "layerName": "resnet2_0", 149 | "inputName": "resnet1_2", 150 | "downsample": true, 151 | "weightsFile": "0.0.layer2.0", 152 | "parameter": { 153 | "input_c": 256, 154 | "temp_c": 128, 155 | "output_c": 512 156 | } 157 | }, 158 | { 159 | "layerStyle": "resnet3", 160 | "layerName": "resnet2_1", 161 | "inputName": "resnet2_0", 162 | "downsample": false, 163 | "weightsFile": "0.0.layer2.1", 164 | "parameter": { 165 | "input_c": 512, 166 | "temp_c": 128, 167 | "output_c": 512 168 | } 169 | }, 170 | { 171 | "layerStyle": "resnet3", 172 | "layerName": "resnet2_2", 173 | "inputName": "resnet2_1", 174 | "downsample": false, 175 | "weightsFile": "0.0.layer2.2", 176 | "parameter": { 177 | "input_c": 512, 178 | "temp_c": 128, 179 | "output_c": 512 180 | } 181 | }, 182 | { 183 | "layerStyle": "resnet3", 184 | "layerName": "resnet2_3", 185 | "inputName": "resnet2_2", 186 | "downsample": false, 187 | 188 | "weightsFile": "0.0.layer2.3", 189 | "parameter": { 190 | "input_c": 512, 191 | "temp_c": 128, 192 | "output_c": 512 193 | } 194 | }, //resnet2_end 195 | { 196 | "layerStyle": "resnet3", 197 | "layerName": "resnet3_0", 198 | "inputName": "resnet2_3", 199 | "downsample": true, 200 | "weightsFile": "0.0.layer3.0", 201 | "parameter": { 202 | "input_c": 512, 203 | "temp_c": 256, 204 | "output_c": 1024 205 | } 206 | }, 207 | { 208 | "layerStyle": "resnet3", 209 | "layerName": "resnet3_1", 210 | "inputName": "resnet3_0", 211 | "downsample": false, 212 | "weightsFile": "0.0.layer3.1", 213 | "parameter": { 214 | "input_c": 1024, 215 | "temp_c": 256, 216 | "output_c": 1024 217 | } 218 | }, 219 | { 220 | "layerStyle": "resnet3", 221 | "layerName": "resnet3_2", 222 | "inputName": "resnet3_1", 223 | "downsample": false, 224 | "weightsFile": "0.0.layer3.2", 225 | "parameter": { 226 | "input_c": 1024, 227 | "temp_c": 256, 228 | "output_c": 1024 229 | } 230 | }, 231 | { 232 | "layerStyle": "resnet3", 233 | "layerName": "resnet3_3", 234 | "inputName": "resnet3_2", 235 | "downsample": false, 236 | "weightsFile": "0.0.layer3.3", 237 | "parameter": { 238 | "input_c": 1024, 239 | "temp_c": 256, 240 | "output_c": 1024 241 | } 242 | }, 243 | { 244 | "layerStyle": "resnet3", 245 | "layerName": "resnet3_4", 246 | "inputName": "resnet3_3", 247 | "downsample": false, 248 | "weightsFile": "0.0.layer3.4", 249 | "parameter": { 250 | "input_c": 1024, 251 | "temp_c": 256, 252 | "output_c": 1024 253 | } 254 | }, 255 | { 256 | "layerStyle": "resnet3", 257 | "layerName": "resnet3_5", 258 | "inputName": "resnet3_4", 259 | "downsample": false, 260 | 261 | "weightsFile": "0.0.layer3.5", 262 | "parameter": { 263 | "input_c": 1024, 264 | "temp_c": 256, 265 | "output_c": 1024 266 | } 267 | }, //resnet3_end 268 | { 269 | "layerStyle": "resnet", 270 | "layerName": "resnet4_0", 271 | "inputName": "resnet3_5", 272 | "left": [ 273 | [ 274 | [ "0.0.layer4.0.conv1", 512, 1, 1, 0, 2 ], 275 | [ "0.0.layer4.0.bn1", 1e-5 ], 276 | [ "relu" ] 277 | ], 278 | [ 279 | [ "0.0.layer4.0.conv2", 512, 3, 1, 2, 2 ], 280 | [ "0.0.layer4.0.bn2", 1e-5 ], 281 | [ "relu" ] 282 | ], 283 | [ 284 | [ "0.0.layer4.0.conv3", 2048, 1, 1, 0, 2 ], 285 | [ "0.0.layer4.0.bn3", 1e-5 ], 286 | [ "relu" ] 287 | ] 288 | ], 289 | "right": [ 290 | [ 291 | [ "0.0.layer4.0.downsample.0", 2048, 1, 1, 0, 2 ], 292 | [ "0.0.layer4.0.downsample.1", 1e-5 ], 293 | [ "relu" ] 294 | ] 295 | ], 296 | "active_type": "relu" 297 | }, 298 | { 299 | "layerStyle": "resnet", 300 | "layerName": "resnet4_1", 301 | "inputName": "resnet4_0", 302 | "left": [ 303 | [ 304 | [ "0.0.layer4.1.conv1", 512, 1, 1, 0 ], 305 | [ "0.0.layer4.1.bn1", 1e-5 ], 306 | [ "relu" ] 307 | ], 308 | [ 309 | [ "0.0.layer4.1.conv2", 512, 3, 1, 4, 4 ], 310 | [ "0.0.layer4.1.bn2", 1e-5 ], 311 | [ "relu" ] 312 | ], 313 | [ 314 | [ "0.0.layer4.1.conv3", 2048, 1, 1, 0 ], 315 | [ "0.0.layer4.1.bn3", 1e-5 ], 316 | [ "relu" ] 317 | ] 318 | ], 319 | "active_type": "relu" 320 | }, 321 | { 322 | "layerStyle": "resnet", 323 | "layerName": "resnet4_2", 324 | "inputName": "resnet4_1", 325 | "left": [ 326 | [ 327 | [ "0.0.layer4.2.conv1", 512, 1, 1, 0 ], 328 | [ "0.0.layer4.2.bn1", 1e-5 ], 329 | [ "relu" ] 330 | ], 331 | [ 332 | [ "0.0.layer4.2.conv2", 512, 3, 1, 8, 8 ], 333 | [ "0.0.layer4.2.bn2", 1e-5 ], 334 | [ "relu" ] 335 | ], 336 | [ 337 | [ "0.0.layer4.2.conv3", 2048, 1, 1, 0 ], 338 | [ "0.0.layer4.2.bn3", 1e-5 ], 339 | [ "relu" ] 340 | ] 341 | ], 342 | "active_type": "relu" 343 | }, //resnet4_end 344 | { 345 | "layerStyle": "cba", 346 | "layerName": "aspp0", 347 | "inputName": "resnet4_2", 348 | 349 | "convFile": "0.1.convs.0.0", 350 | "parameter": { 351 | "input_c": 2048, 352 | "output_c": 256, 353 | "kernel": [ 1, 1 ], 354 | "padding": [ 0, 0 ], 355 | "stride": [ 1, 1 ] 356 | }, 357 | "bnFile": "0.1.convs.0.1", 358 | "active_type": "relu" 359 | }, 360 | { 361 | "layerStyle": "cba", 362 | "layerName": "aspp1", 363 | 364 | "inputName": "resnet4_2", 365 | "convFile": "0.1.convs.1.0", 366 | "parameter": { 367 | "input_c": 2048, 368 | "output_c": 256, 369 | "kernel": [ 3, 3 ], 370 | "padding": [ 6, 6 ], 371 | "stride": [ 1, 1 ], 372 | "dilations": [ 6, 6 ] 373 | }, 374 | "bnFile": "0.1.convs.1.1", 375 | "active_type": "relu" 376 | }, 377 | { 378 | "layerStyle": "cba", 379 | "layerName": "aspp2", 380 | 381 | "inputName": "resnet4_2", 382 | "convFile": "0.1.convs.2.0", 383 | "parameter": { 384 | "input_c": 2048, 385 | "output_c": 256, 386 | "kernel": [ 3, 3 ], 387 | "padding": [ 12, 12 ], 388 | "stride": [ 1, 1 ], 389 | "dilations": [ 12, 12 ] 390 | }, 391 | "bnFile": "0.1.convs.2.1", 392 | "active_type": "relu" 393 | }, 394 | { 395 | "layerStyle": "cba", 396 | "layerName": "aspp3", 397 | 398 | "inputName": "resnet4_2", 399 | "convFile": "0.1.convs.3.0", 400 | "parameter": { 401 | "input_c": 2048, 402 | "output_c": 256, 403 | "kernel": [ 3, 3 ], 404 | "padding": [ 18, 18 ], 405 | "stride": [ 1, 1 ], 406 | "dilations": [ 18, 18 ] 407 | }, 408 | "bnFile": "0.1.convs.3.1", 409 | "active_type": "relu" 410 | }, 411 | { 412 | "layerStyle": "reduce", 413 | "layerName": "avgPool", 414 | "inputName": "resnet4_2", 415 | "keepD": true, 416 | "axes": 6 417 | }, 418 | { 419 | "layerStyle": "cba", 420 | "layerName": "aspp4", 421 | "inputName": "avgPool", 422 | "convFile": "0.1.convs.4.1", 423 | "parameter": { 424 | "input_c": 2048, 425 | "output_c": 256, 426 | "kernel": [ 1, 1 ], 427 | "padding": [ 0, 0 ], 428 | "stride": [ 1, 1 ] 429 | }, 430 | "bnFile": "0.1.convs.4.2", 431 | "active_type": "relu" 432 | }, 433 | { 434 | "layerStyle": "upsample", 435 | "layerName": "aspp4_up", 436 | "inputName": "aspp4", 437 | "mode": "bilinear", 438 | "grid": [ 40 ] 439 | }, 440 | { 441 | "layerStyle": "concat", 442 | "layerName": "aspp_cat", 443 | "axis": 0, 444 | "inputName": [ "aspp0", "aspp1", "aspp2", "aspp3", "aspp4_up" ] 445 | }, 446 | { 447 | "layerStyle": "cba", 448 | "layerName": "cls_aspp", 449 | "inputName": "aspp_cat", 450 | 451 | "convFile": "0.1.project.0", 452 | "parameter": { 453 | "input_c": 1280, 454 | "output_c": 256, 455 | "kernel": [ 1, 1 ], 456 | "padding": [ 0, 0 ], 457 | "stride": [ 1, 1 ] 458 | }, 459 | "bnFile": "0.1.project.1", 460 | "active_type": "relu" 461 | }, 462 | { 463 | "layerStyle": "upsample", 464 | "layerName": "cls_aspp_up", 465 | 466 | "inputName": "cls_aspp", 467 | "mode": "bilinear", 468 | "grid": [ 160 ] 469 | }, 470 | { 471 | "layerStyle": "cba", 472 | "layerName": "res_aspp", 473 | 474 | "inputName": "resnet1_2", 475 | "convFile": "1.neck.0.lateral_block.conv", 476 | "parameter": { 477 | "input_c": 256, 478 | "output_c": 48, 479 | "kernel": [ 1, 1 ], 480 | "padding": [ 0, 0 ], 481 | "stride": [ 1, 1 ] 482 | }, 483 | "bnFile": "1.neck.0.lateral_block.bn", 484 | "active_type": "relu" 485 | }, 486 | { 487 | "layerStyle": "concat", 488 | "layerName": "project_cat", 489 | 490 | "axis": 0, 491 | "inputName": [ "cls_aspp_up", "res_aspp" ] 492 | }, 493 | { 494 | "layerStyle": "cba", 495 | "layerName": "class_1", 496 | "inputName": "project_cat", 497 | 498 | "convFile": "2.block.0.block.0.conv", 499 | "parameter": { 500 | "input_c": 304, 501 | "output_c": 256, 502 | "kernel": [ 3, 3 ], 503 | "padding": [ 1, 1 ], 504 | "stride": [ 1, 1 ] 505 | }, 506 | "bnFile": "2.block.0.block.0.bn", 507 | "active_type": "relu" 508 | }, 509 | { 510 | "layerStyle": "cba", 511 | "layerName": "class_2", 512 | "inputName": "class_1", 513 | 514 | "convFile": "2.block.0.block.1.conv", 515 | "parameter": { 516 | "input_c": 256, 517 | "output_c": 256, 518 | "kernel": [ 3, 3 ], 519 | "padding": [ 1, 1 ], 520 | "stride": [ 1, 1 ] 521 | }, 522 | "bnFile": "2.block.0.block.1.bn", 523 | "active_type": "relu" 524 | }, 525 | { 526 | "layerStyle": "conv", 527 | "layerName": "class", 528 | "inputName": "class_2", 529 | "weightFile": "2.block.1", 530 | "biasFile": "2.block.1", 531 | 532 | "parameter": { 533 | "input_c": 256, 534 | "output_c": 15, 535 | "kernel": [ 1, 1 ], 536 | "padding": [ 0, 0 ], 537 | "stride": [ 1, 1 ] 538 | } 539 | }, 540 | { 541 | "layerStyle": "upsample", 542 | "layerName": "cls_up", 543 | "inputName": "class", 544 | "mode": "linear", 545 | 546 | "grid": [ 640 ] 547 | }, 548 | { 549 | "layerStyle": "softmax", 550 | "layerName": "soft_max", 551 | "inputName": "cls_up", 552 | 553 | "axes": 1 554 | }, 555 | { 556 | "layerStyle": "topk", 557 | "layerName": "top_max", 558 | "inputName": "soft_max", 559 | "outputName": "output", 560 | "TopKOperation": "kMAX", 561 | "k": 1, 562 | "reduceAxes": 1, 563 | "outputIndex": 1 564 | } 565 | ] 566 | } 567 | -------------------------------------------------------------------------------- /model/resnet50.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_c": 3, 3 | "input_h": 256, 4 | "input_w": 256, 5 | "ENGPath": "H:/myGitHub/tensorrtF/model/resnet50/res50.eng", 6 | "weightsDir": "H:/myGitHub/tensorrtF/model/resnet50/wts/", 7 | "onnxPath": "H:/myGitHub/tensorrtF/model/resnet50/res50.onnx", 8 | "fp16": true, 9 | //"int8": true, 10 | //"cali_txt": "H:/myGitHub/tensorrtF/model/resnet50/cali.txt", 11 | //"cali_table": "H:/myGitHub/tensorrtF/model/resnet50/cali.table", 12 | //"div_255": true, 13 | //"Mean": [ 0.485, 0.456, 0.406 ], 14 | //"Std": [ 0.229, 0.224, 0.225 ], 15 | "inputBlobName": "data", 16 | "outputBlobName": "output", 17 | "maxBatchsize": 10, 18 | "outputSize": 1000, 19 | "network": [ 20 | { 21 | "layerStyle": "cba", 22 | "layerName": "conv_0", 23 | "inputName": "data", 24 | "convFile": "conv1", 25 | "parameter": { 26 | "output_c": 64, 27 | "kernel": [ 7 ], 28 | "padding": [ 3 ], 29 | "stride": [ 2 ] 30 | }, 31 | "bnFile": "bn1", 32 | "active_type": "relu" 33 | }, 34 | { 35 | "layerStyle": "pool", 36 | "layerName": "maxpool_3", 37 | "inputName": "conv_0", 38 | "parameter": { 39 | "poolType": "kMAX", 40 | "kernel": [ 3, 3 ], 41 | "padding": [ 1, 1 ], 42 | "stride": [ 2, 2 ] 43 | } 44 | }, 45 | { 46 | "layerStyle": "cba", 47 | "layerName": "conv_4", 48 | "inputName": "maxpool_3", 49 | "convFile": "layer1.0.conv1", 50 | "parameter": { 51 | "output_c": 64, 52 | "kernel": [ 1, 1 ], 53 | "padding": [ 0, 0 ], 54 | "stride": [ 1, 1 ] 55 | }, 56 | "bnFile": "layer1.0.bn1", 57 | "active_type": "relu" 58 | }, 59 | { 60 | "layerStyle": "cba", 61 | "layerName": "conv_7", 62 | "inputName": "conv_4", 63 | "convFile": "layer1.0.conv2", 64 | "parameter": { 65 | "output_c": 64, 66 | "kernel": [ 3, 3 ], 67 | "padding": [ 1, 1 ], 68 | "stride": [ 1, 1 ] 69 | }, 70 | "bnFile": "layer1.0.bn2", 71 | "active_type": "relu" 72 | }, 73 | { 74 | "layerStyle": "cba", 75 | "layerName": "conv_10", 76 | "inputName": "conv_7", 77 | "convFile": "layer1.0.conv3", 78 | "parameter": { 79 | "input_c": 64, 80 | "output_c": 256, 81 | "kernel": [ 1, 1 ], 82 | "padding": [ 0, 0 ], 83 | "stride": [ 1, 1 ] 84 | }, 85 | "bnFile": "layer1.0.bn3" 86 | }, 87 | { 88 | "layerStyle": "cba", 89 | "layerName": "conv_12", 90 | "inputName": "maxpool_3", 91 | "convFile": "layer1.0.downsample.0", 92 | "parameter": { 93 | "input_c": 64, 94 | "output_c": 256, 95 | "kernel": [ 1, 1 ], 96 | "padding": [ 0, 0 ], 97 | "stride": [ 1, 1 ] 98 | }, 99 | "bnFile": "layer1.0.downsample.1" 100 | }, 101 | { 102 | "layerStyle": "eltwise", 103 | "layerName": "Add_14", 104 | "inputName": [ "conv_10", "conv_12" ], 105 | "eltType": "kSUM" 106 | }, 107 | { 108 | "layerStyle": "active", 109 | "layerName": "relu_15", 110 | "active_type": "relu", 111 | "inputName": "Add_14" 112 | }, 113 | { 114 | "layerStyle": "resnet3", 115 | "layerName": "resnet1_1", 116 | "inputName": "relu_15", 117 | "downsample": false, 118 | "weightsFile": "layer1.1", 119 | "parameter": { 120 | "input_c": 256, 121 | "temp_c": 64, 122 | "output_c": 256 123 | } 124 | }, 125 | { 126 | "layerStyle": "resnet3", 127 | "layerName": "resnet1_2", 128 | "inputName": "resnet1_1", 129 | "downsample": false, 130 | 131 | "weightsFile": "layer1.2", 132 | "parameter": { 133 | "input_c": 256, 134 | "temp_c": 64, 135 | "output_c": 256 136 | } 137 | }, //resnet1_end 138 | { 139 | "layerStyle": "resnet3", 140 | "layerName": "resnet2_0", 141 | "inputName": "resnet1_2", 142 | "downsample": true, 143 | "weightsFile": "layer2.0", 144 | "parameter": { 145 | "input_c": 256, 146 | "temp_c": 128, 147 | "output_c": 512 148 | } 149 | }, 150 | { 151 | "layerStyle": "resnet3", 152 | "layerName": "resnet2_1", 153 | "inputName": "resnet2_0", 154 | "downsample": false, 155 | "weightsFile": "layer2.1", 156 | "parameter": { 157 | "input_c": 512, 158 | "temp_c": 128, 159 | "output_c": 512 160 | } 161 | }, 162 | { 163 | "layerStyle": "resnet3", 164 | "layerName": "resnet2_2", 165 | "inputName": "resnet2_1", 166 | "downsample": false, 167 | "weightsFile": "layer2.2", 168 | "parameter": { 169 | "input_c": 512, 170 | "temp_c": 128, 171 | "output_c": 512 172 | } 173 | }, 174 | { 175 | "layerStyle": "resnet3", 176 | "layerName": "resnet2_3", 177 | "inputName": "resnet2_2", 178 | "downsample": false, 179 | 180 | "weightsFile": "layer2.3", 181 | "parameter": { 182 | "input_c": 512, 183 | "temp_c": 128, 184 | "output_c": 512 185 | } 186 | }, //resnet2_end 187 | { 188 | "layerStyle": "resnet3", 189 | "layerName": "resnet3_0", 190 | "inputName": "resnet2_3", 191 | "downsample": true, 192 | "weightsFile": "layer3.0", 193 | "parameter": { 194 | "input_c": 512, 195 | "temp_c": 256, 196 | "output_c": 1024 197 | } 198 | }, 199 | { 200 | "layerStyle": "resnet3", 201 | "layerName": "resnet3_1", 202 | "inputName": "resnet3_0", 203 | "downsample": false, 204 | "weightsFile": "layer3.1", 205 | "parameter": { 206 | "input_c": 1024, 207 | "temp_c": 256, 208 | "output_c": 1024 209 | } 210 | }, 211 | { 212 | "layerStyle": "resnet3", 213 | "layerName": "resnet3_2", 214 | "inputName": "resnet3_1", 215 | "downsample": false, 216 | "weightsFile": "layer3.2", 217 | "parameter": { 218 | "input_c": 1024, 219 | "temp_c": 256, 220 | "output_c": 1024 221 | } 222 | }, 223 | { 224 | "layerStyle": "resnet3", 225 | "layerName": "resnet3_3", 226 | "inputName": "resnet3_2", 227 | "downsample": false, 228 | "weightsFile": "layer3.3", 229 | "parameter": { 230 | "input_c": 1024, 231 | "temp_c": 256, 232 | "output_c": 1024 233 | } 234 | }, 235 | { 236 | "layerStyle": "resnet3", 237 | "layerName": "resnet3_4", 238 | "inputName": "resnet3_3", 239 | "downsample": false, 240 | "weightsFile": "layer3.4", 241 | "parameter": { 242 | "input_c": 1024, 243 | "temp_c": 256, 244 | "output_c": 1024 245 | } 246 | }, 247 | { 248 | "layerStyle": "resnet3", 249 | "layerName": "resnet3_5", 250 | "inputName": "resnet3_4", 251 | "downsample": false, 252 | 253 | "weightsFile": "layer3.5", 254 | "parameter": { 255 | "input_c": 1024, 256 | "temp_c": 256, 257 | "output_c": 1024 258 | } 259 | }, //resnet3_end 260 | { 261 | "layerStyle": "resnet3", 262 | "layerName": "resnet4_0", 263 | "inputName": "resnet3_5", 264 | "downsample": true, 265 | "weightsFile": "layer4.0", 266 | "parameter": { 267 | "input_c": 1024, 268 | "temp_c": 512, 269 | "output_c": 2048 270 | } 271 | }, 272 | { 273 | "layerStyle": "resnet3", 274 | "layerName": "resnet4_1", 275 | "inputName": "resnet4_0", 276 | "downsample": false, 277 | "weightsFile": "layer4.1", 278 | "parameter": { 279 | "input_c": 1024, 280 | "temp_c": 512, 281 | "output_c": 2048 282 | } 283 | }, 284 | { 285 | "layerStyle": "resnet3", 286 | "layerName": "resnet4_2", 287 | "inputName": "resnet4_1", 288 | "downsample": false, 289 | "weightsFile": "layer4.2", 290 | "parameter": { 291 | "input_c": 1024, 292 | "temp_c": 512, 293 | "output_c": 2048 294 | } 295 | }, 296 | { 297 | "layerStyle": "reduce", 298 | "layerName": "avgPool_172", 299 | "inputName": "resnet4_2", 300 | "type": "kAVG", 301 | 302 | "axes": 6, 303 | "keepD": true 304 | }, 305 | { 306 | "layerStyle": "fc", 307 | "layerName": "gemm_173", 308 | "inputName": "avgPool_172", 309 | "outputName": "output", 310 | "weightFile": "fc.weight", 311 | "biasFile": "fc.bias", 312 | "parameter": { 313 | "input_c": 2048, 314 | "output_c": 1000 315 | } 316 | } 317 | 318 | ] 319 | 320 | } 321 | -------------------------------------------------------------------------------- /model/yolo/yolov5s.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_c": 3, 3 | "input_h": 640, 4 | "input_w": 640, 5 | "createENG": true, 6 | "ENGPath": "/mnt/f/LearningCode/LinuxCode/yolov5-4.0/yolov5.eng", 7 | "weightsDir": "/mnt/f/LearningCode/LinuxCode/yolov5-4.0/wts/", 8 | "fp16": true, 9 | "inputBlobName": "data", 10 | "outputBlobName": "prob", 11 | "outputSize": 1801, 12 | "maxBatchsize": 10, 13 | "doInfer": true, 14 | "BatchSize": 3, // [1,maxBatchsize] 15 | "imgDir": "/mnt/f/dataset/oneimg/", 16 | "imgType": "jpg", 17 | "network": [ 18 | { 19 | "layerStyle": "preInput", 20 | "layerName": "m_s", 21 | "inputName": "data", 22 | "div_255": true 23 | }, 24 | { 25 | "layerStyle": "focus", 26 | "layerName": "focus_0", 27 | "inputName": "m_s" 28 | }, 29 | { 30 | "layerStyle": "cba", 31 | "layerName": "conv_1", 32 | "inputName": "focus_0", 33 | 34 | "convFile": "model.0.conv.conv", 35 | "parameter": { 36 | "output_c": 32, 37 | "kernel": [ 3, 3 ], 38 | "padding": [ 1, 1 ], 39 | "stride": [ 1, 1 ] 40 | }, 41 | "bnFile": "model.0.conv.bn", 42 | "active_type": "silu", 43 | "eps": 1e-3 44 | }, 45 | { 46 | "layerStyle": "cba", 47 | "layerName": "conv_2", 48 | "inputName": "conv_1", 49 | 50 | "convFile": "model.1.conv", 51 | "parameter": { 52 | "output_c": 64, 53 | "kernel": [ 3, 3 ], 54 | "padding": [ 1, 1 ], 55 | "stride": [ 2, 2 ] 56 | }, 57 | "bnFile": "model.1.bn", 58 | "active_type": "silu", 59 | "eps": 1e-3 60 | }, 61 | { 62 | "layerStyle": "C3", 63 | "layerName": "bottleneck_CSP2", 64 | "inputName": "conv_2", 65 | "c1": 64, 66 | "c2": 64, 67 | "n": 1, 68 | "shortCut": true, 69 | "g": 1, 70 | "e": 0.5, 71 | "lname": "model.2", 72 | "active_type": "silu", 73 | "eps": 1e-3 74 | }, 75 | { 76 | "layerStyle": "cba", 77 | "layerName": "conv_3", 78 | "inputName": "bottleneck_CSP2", 79 | "convFile": "model.3.conv", 80 | "parameter": { 81 | "output_c": 128, 82 | "kernel": [ 3, 3 ], 83 | "padding": [ 1, 1 ], 84 | "stride": [ 2, 2 ] 85 | }, 86 | "bnFile": "model.3.bn", 87 | "active_type": "silu", 88 | "eps": 1e-3 89 | }, 90 | { 91 | "layerStyle": "C3", 92 | "layerName": "bottleneck_CSP4", 93 | "inputName": "conv_3", 94 | "c1": 128, 95 | "c2": 128, 96 | "n": 3, 97 | "shortCut": true, 98 | "g": 1, 99 | "e": 0.5, 100 | "lname": "model.4", 101 | "active_type": "silu", 102 | "eps": 1e-3 103 | }, 104 | { 105 | "layerStyle": "cba", 106 | "layerName": "conv_5", 107 | "inputName": "bottleneck_CSP4", 108 | "convFile": "model.5.conv", 109 | "parameter": { 110 | "output_c": 256, 111 | "kernel": [ 3, 3 ], 112 | "padding": [ 1, 1 ], 113 | "stride": [ 2, 2 ] 114 | }, 115 | "bnFile": "model.5.bn", 116 | "active_type": "silu", 117 | "eps": 1e-3 118 | }, 119 | { 120 | "layerStyle": "C3", 121 | "layerName": "bottleneck_CSP6", 122 | "inputName": "conv_5", 123 | "c1": 256, 124 | "c2": 256, 125 | "n": 3, 126 | "shortCut": true, 127 | "e": 0.5, 128 | "lname": "model.6", 129 | "active_type": "silu", 130 | "eps": 1e-3 131 | }, 132 | { 133 | "layerStyle": "cba", 134 | "layerName": "conv_7", 135 | "inputName": "bottleneck_CSP6", 136 | "convFile": "model.7.conv", 137 | "parameter": { 138 | "output_c": 512, 139 | "kernel": [ 3, 3 ], 140 | "padding": [ 1, 1 ], 141 | "stride": [ 2, 2 ] 142 | }, 143 | "bnFile": "model.7.bn", 144 | "active_type": "silu", 145 | "eps": 1e-3 146 | }, 147 | { 148 | "layerStyle": "spp", 149 | "layerName": "spp_8", 150 | "inputName": "conv_7", 151 | "lname": "model.8", 152 | "c1": 512, 153 | "c2": 512, 154 | "kernels": [ 5, 9, 13 ], 155 | "eps": 1e-3, 156 | "active_type": "silu" 157 | }, 158 | { 159 | "layerStyle": "C3", 160 | "layerName": "bottleneck_CSP9", 161 | "inputName": "spp_8", 162 | "c1": 512, 163 | "c2": 512, 164 | "n": 1, 165 | "shortCut": false, 166 | "g": 1, 167 | "e": 0.5, 168 | "lname": "model.9", 169 | "active_type": "silu", 170 | "eps": 1e-3 171 | }, 172 | { 173 | "layerStyle": "cba", 174 | "layerName": "conv_10", 175 | "inputName": "bottleneck_CSP9", 176 | 177 | "convFile": "model.10.conv", 178 | "parameter": { 179 | "output_c": 256, 180 | "kernel": [ 1 ], 181 | "padding": [ 0 ], 182 | "stride": [ 1 ] 183 | }, 184 | "bnFile": "model.10.bn", 185 | "active_type": "silu", 186 | "eps": 1e-3 187 | }, 188 | { 189 | "layerStyle": "upsample", 190 | "layerName": "upsample_11", 191 | 192 | "mode": "nearest", 193 | "inputName": "conv_10" 194 | }, 195 | { 196 | "layerStyle": "concat", 197 | "layerName": "concat_12", 198 | "inputName": [ "upsample_11", "bottleneck_CSP6" ], 199 | "axis": 0 200 | }, 201 | { 202 | "layerStyle": "C3", 203 | "layerName": "bottleneck_CSP13", 204 | "inputName": "concat_12", 205 | "c1": 512, 206 | "c2": 256, 207 | "n": 1, 208 | "shortCut": false, 209 | "g": 1, 210 | "e": 0.5, 211 | "lname": "model.13", 212 | "active_type": "silu", 213 | "eps": 1e-3 214 | }, 215 | { 216 | "layerStyle": "cba", 217 | "layerName": "conv_14", 218 | "inputName": "bottleneck_CSP13", 219 | "convFile": "model.14.conv", 220 | "parameter": { 221 | "output_c": 128, 222 | "kernel": [ 1 ], 223 | "padding": [ 0 ], 224 | "stride": [ 1 ] 225 | }, 226 | "bnFile": "model.14.bn", 227 | "active_type": "silu", 228 | "eps": 1e-3 229 | }, 230 | { 231 | "layerStyle": "upsample", 232 | "layerName": "upsample_15", 233 | "mode": "nearest", 234 | "inputName": "conv_14" 235 | }, 236 | { 237 | "layerStyle": "concat", 238 | "layerName": "concat_16", 239 | "inputName": [ "upsample_15", "bottleneck_CSP4" ], 240 | "axis": 0 241 | }, 242 | { 243 | "layerStyle": "C3", 244 | "layerName": "bottleneck_CSP17", 245 | "inputName": "concat_16", 246 | "c1": 256, 247 | "c2": 128, 248 | "n": 1, 249 | "shortCut": false, 250 | "g": 1, 251 | "e": 0.5, 252 | "lname": "model.17", 253 | "active_type": "silu", 254 | "eps": 1e-3 255 | }, 256 | { 257 | "layerStyle": "cba", 258 | "layerName": "conv_18", 259 | "inputName": "bottleneck_CSP17", 260 | "convFile": "model.18.conv", 261 | "parameter": { 262 | "output_c": 128, 263 | "kernel": [ 3 ], 264 | "padding": [ 1 ], 265 | "stride": [ 2 ] 266 | }, 267 | "bnFile": "model.18.bn", 268 | "active_type": "silu", 269 | "eps": 1e-3 270 | }, 271 | { 272 | "layerStyle": "concat", 273 | "layerName": "concat_19", 274 | "inputName": [ "conv_18", "conv_14" ], 275 | "axis": 0 276 | }, 277 | { 278 | "layerStyle": "C3", 279 | "layerName": "bottleneck_CSP20", 280 | "inputName": "concat_19", 281 | "c1": 256, 282 | "c2": 256, 283 | "n": 1, 284 | "shortCut": false, 285 | "g": 1, 286 | "e": 0.5, 287 | "lname": "model.20", 288 | "active_type": "silu", 289 | "eps": 1e-3 290 | }, 291 | { 292 | "layerStyle": "cba", 293 | "layerName": "conv_21", 294 | "inputName": "bottleneck_CSP20", 295 | "convFile": "model.21.conv", 296 | "parameter": { 297 | "output_c": 256, 298 | "kernel": [ 3 ], 299 | "padding": [ 1 ], 300 | "stride": [ 2 ] 301 | }, 302 | "bnFile": "model.21.bn", 303 | "active_type": "silu", 304 | "eps": 1e-3 305 | }, 306 | { 307 | "layerStyle": "concat", 308 | "layerName": "concat_22", 309 | "inputName": [ "conv_21", "conv_10" ], 310 | "axis": 0 311 | }, 312 | { 313 | "layerStyle": "C3", 314 | "layerName": "bottleneck_CSP23", 315 | "inputName": "concat_22", 316 | "c1": 512, 317 | "c2": 512, 318 | "n": 1, 319 | "shortCut": false, 320 | "g": 1, 321 | "e": 0.5, 322 | "lname": "model.23", 323 | "active_type": "silu", 324 | "eps": 1e-3 325 | }, 326 | { 327 | "layerStyle": "conv", 328 | "layerName": "det_0", 329 | "inputName": "bottleneck_CSP17", 330 | "weightFile": "model.24.m.0", 331 | "biasFile": "model.24.m.0", 332 | 333 | "parameter": { 334 | "output_c": 255, // output_c = 3 * (cls_num + 5) 335 | "kernel": [ 1 ] 336 | } 337 | }, 338 | { 339 | "layerStyle": "conv", 340 | "layerName": "det_1", 341 | "inputName": "bottleneck_CSP20", 342 | "weightFile": "model.24.m.1", 343 | "biasFile": "model.24.m.1", 344 | 345 | "parameter": { 346 | "output_c": 255, // output_c = 3 * (cls_num + 5) 347 | "kernel": [ 1 ] 348 | } 349 | }, 350 | { 351 | "layerStyle": "conv", 352 | "layerName": "det_2", 353 | "inputName": "bottleneck_CSP23", 354 | "weightFile": "model.24.m.2", 355 | "biasFile": "model.24.m.2", 356 | 357 | "parameter": { 358 | "output_c": 255, // output_c = 3 * (cls_num + 5) 359 | "kernel": [ 1 ] 360 | } 361 | }, 362 | { 363 | "layerStyle": "yolo", 364 | "layerName": "YOLO", 365 | "outputName": "prob", 366 | "inputName": [ "det_0", "det_1", "det_2" ], 367 | "anchor_grid": "model.24.anchor_grid", 368 | //"Anchor": [ 10 ,13 ,16 ,30 ,33 ,23 ,30 ,61 ,62 ,45 ,59 ,119 ,116 ,90 ,156 ,198 ,373 ,326 ], 369 | "cls_num": 80, 370 | "max_box": 300 //outputSize = 300 * (4 + 1 + 1) +1 =1801 ,4:(x1,y1,x2,y2); 1(conf) ;1 (cls) 371 | } 372 | ] 373 | 374 | } 375 | -------------------------------------------------------------------------------- /src/CMakeLists.txt1: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | 3 | file(GLOB TRT_FILES "*.h" "*.cpp" "./plugin/*.cu" "./plugin/*.h" "./plugin/*.cpp") 4 | -------------------------------------------------------------------------------- /src/calibrator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "calibrator.h" 3 | #include "utils.h" 4 | 5 | 6 | vector loadImages(const string imgTxt) 7 | { 8 | vector imgInfo; 9 | FILE *f = fopen(imgTxt.c_str(),"r"); 10 | if (!f){ 11 | perror("Error"); 12 | cout<<"cant open file"< Mean, 44 | vector Std, 45 | bool isDiv255):m_batchsize(batchsize), 46 | m_inputC(inputC), 47 | m_inputH(inputH), 48 | m_inputW(inputW), 49 | m_InputCount(batchsize * inputC * inputH * inputW), 50 | m_inputName(inputName.c_str()), 51 | m_calibratorPath(calibratorPath), 52 | m_ImageIndex(0) 53 | { 54 | m_ImageList = loadImages(caliTxt); 55 | m_mean = Mean; 56 | m_std = Std; 57 | 58 | div_255 = isDiv255; 59 | 60 | cudaMalloc(&m_CudaInput,m_InputCount*sizeof (float)); 61 | 62 | } 63 | int calibrator::getBatchSize() const 64 | { 65 | return m_batchsize; 66 | } 67 | bool calibrator::getBatch(void **bindings, const char **names, int nbBindings) 68 | { 69 | 70 | if(m_ImageIndex + m_batchsize > m_ImageList.size()){ 71 | return false; 72 | } 73 | int flag = 0; 74 | cv::Scalar Mean; 75 | cv::Scalar Std; 76 | 77 | if(m_inputC == 3) 78 | { 79 | flag = 1; 80 | Mean = cv::Scalar(m_mean[0], m_mean[1], m_mean[2]); 81 | Std = cv::Scalar(m_std[0], m_std[1], m_std[2]); 82 | } 83 | else if(m_inputC == 1){ 84 | Mean = cv::Scalar(m_mean[0]); 85 | Std = cv::Scalar(m_std[0]); 86 | } 87 | else { 88 | cout<<"not support "< InputImgs; 91 | for (unsigned int i = m_ImageIndex; i < m_ImageIndex + m_batchsize;i++) { 92 | string imgPath = m_ImageList.at(i); 93 | cout<(0),m_InputCount*sizeof (float),cudaMemcpyHostToDevice); 114 | 115 | bindings[0] = m_CudaInput; 116 | return true; 117 | } 118 | const void *calibrator::readCalibrationCache(size_t &length) 119 | { 120 | void *output; 121 | m_CalibrationCache.clear(); 122 | ifstream input(m_calibratorPath,ios::binary); 123 | input >> noskipws; 124 | if (input.good()) 125 | { 126 | copy(istream_iterator(input),istream_iterator(),back_inserter(m_CalibrationCache)); 127 | } 128 | length = m_CalibrationCache.size(); 129 | 130 | if(length){ 131 | std::cout << "Using cached calibration table to build the engine " << std::endl; 132 | output = &m_CalibrationCache[0]; 133 | } 134 | else { 135 | std::cout << "New calibration table will be created to build the engine" << std::endl; 136 | output = nullptr; 137 | } 138 | 139 | return output; 140 | } 141 | void calibrator::writeCalibrationCache(const void *ptr, std::size_t length) 142 | { 143 | assert(!m_calibratorPath.empty()); 144 | cout<<"length = "<(ptr),length); 147 | output.close(); 148 | } 149 | -------------------------------------------------------------------------------- /src/calibrator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef CALIBRATOR_H 3 | #define CALIBRATOR_H 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace nvinfer1; 14 | 15 | class calibrator : public nvinfer1::IInt8EntropyCalibrator2 16 | { 17 | public: 18 | calibrator(const unsigned int &batchsize, 19 | const string &caliTxt, 20 | const string &calibratorPath, 21 | const unsigned int &inputC, 22 | const unsigned int &inputH, 23 | const unsigned int &inputW, 24 | const string &inputName, 25 | vector Mean, 26 | vector Std, 27 | bool isDiv255); 28 | 29 | int getBatchSize() const override; 30 | bool getBatch(void* bindings[], const char* names[], int nbBindings) override; 31 | const void* readCalibrationCache(size_t& length) override; 32 | void writeCalibrationCache(const void* ptr, std::size_t length) override; 33 | 34 | private: 35 | unsigned int m_batchsize; 36 | const unsigned int m_inputC; 37 | const unsigned int m_inputH; 38 | const unsigned int m_inputW; 39 | vector m_mean; 40 | vector m_std; 41 | //const uint64_t m_inputSize; 42 | const uint64_t m_InputCount; 43 | const char *m_inputName; 44 | const string m_calibratorPath{nullptr}; 45 | vector m_ImageList; 46 | void *m_CudaInput{nullptr}; 47 | vector m_CalibrationCache; 48 | unsigned int m_ImageIndex; 49 | bool div_255; 50 | 51 | }; 52 | 53 | #endif // CALIBRATOR_H 54 | -------------------------------------------------------------------------------- /src/json-forwards.h: -------------------------------------------------------------------------------- 1 | /// Json-cpp amalgamated forward header (http://jsoncpp.sourceforge.net/). 2 | /// It is intended to be used with #include "json/json-forwards.h" 3 | /// This header provides forward declaration for all JsonCpp types. 4 | 5 | // ////////////////////////////////////////////////////////////////////// 6 | // Beginning of content of file: LICENSE 7 | // ////////////////////////////////////////////////////////////////////// 8 | 9 | /* 10 | The JsonCpp library's source code, including accompanying documentation, 11 | tests and demonstration applications, are licensed under the following 12 | conditions... 13 | 14 | Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all 15 | jurisdictions which recognize such a disclaimer. In such jurisdictions, 16 | this software is released into the Public Domain. 17 | 18 | In jurisdictions which do not recognize Public Domain property (e.g. Germany as of 19 | 2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and 20 | The JsonCpp Authors, and is released under the terms of the MIT License (see below). 21 | 22 | In jurisdictions which recognize Public Domain property, the user of this 23 | software may choose to accept it either as 1) Public Domain, 2) under the 24 | conditions of the MIT License (see below), or 3) under the terms of dual 25 | Public Domain/MIT License conditions described here, as they choose. 26 | 27 | The MIT License is about as close to Public Domain as a license can get, and is 28 | described in clear, concise terms at: 29 | 30 | http://en.wikipedia.org/wiki/MIT_License 31 | 32 | The full text of the MIT License follows: 33 | 34 | ======================================================================== 35 | Copyright (c) 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 36 | 37 | Permission is hereby granted, free of charge, to any person 38 | obtaining a copy of this software and associated documentation 39 | files (the "Software"), to deal in the Software without 40 | restriction, including without limitation the rights to use, copy, 41 | modify, merge, publish, distribute, sublicense, and/or sell copies 42 | of the Software, and to permit persons to whom the Software is 43 | furnished to do so, subject to the following conditions: 44 | 45 | The above copyright notice and this permission notice shall be 46 | included in all copies or substantial portions of the Software. 47 | 48 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 49 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 50 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 51 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 52 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 53 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 54 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 55 | SOFTWARE. 56 | ======================================================================== 57 | (END LICENSE TEXT) 58 | 59 | The MIT license is compatible with both the GPL and commercial 60 | software, affording one all of the rights of Public Domain with the 61 | minor nuisance of being required to keep the above copyright notice 62 | and license text in the source code. Note also that by accepting the 63 | Public Domain "license" you can re-license your copy using whatever 64 | license you like. 65 | 66 | */ 67 | 68 | // ////////////////////////////////////////////////////////////////////// 69 | // End of content of file: LICENSE 70 | // ////////////////////////////////////////////////////////////////////// 71 | 72 | 73 | 74 | 75 | 76 | #ifndef JSON_FORWARD_AMALGAMATED_H_INCLUDED 77 | # define JSON_FORWARD_AMALGAMATED_H_INCLUDED 78 | /// If defined, indicates that the source file is amalgamated 79 | /// to prevent private header inclusion. 80 | #define JSON_IS_AMALGAMATION 81 | 82 | // ////////////////////////////////////////////////////////////////////// 83 | // Beginning of content of file: include/json/config.h 84 | // ////////////////////////////////////////////////////////////////////// 85 | 86 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 87 | // Distributed under MIT license, or public domain if desired and 88 | // recognized in your jurisdiction. 89 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 90 | 91 | #ifndef JSON_CONFIG_H_INCLUDED 92 | #define JSON_CONFIG_H_INCLUDED 93 | #include 94 | #include 95 | #include 96 | #include 97 | #include 98 | #include 99 | #include 100 | #include 101 | 102 | /// If defined, indicates that json library is embedded in CppTL library. 103 | //# define JSON_IN_CPPTL 1 104 | 105 | /// If defined, indicates that json may leverage CppTL library 106 | //# define JSON_USE_CPPTL 1 107 | /// If defined, indicates that cpptl vector based map should be used instead of 108 | /// std::map 109 | /// as Value container. 110 | //# define JSON_USE_CPPTL_SMALLMAP 1 111 | 112 | // If non-zero, the library uses exceptions to report bad input instead of C 113 | // assertion macros. The default is to use exceptions. 114 | #ifndef JSON_USE_EXCEPTION 115 | #define JSON_USE_EXCEPTION 1 116 | #endif 117 | 118 | // Temporary, tracked for removal with issue #982. 119 | #ifndef JSON_USE_NULLREF 120 | #define JSON_USE_NULLREF 1 121 | #endif 122 | 123 | /// If defined, indicates that the source file is amalgamated 124 | /// to prevent private header inclusion. 125 | /// Remarks: it is automatically defined in the generated amalgamated header. 126 | // #define JSON_IS_AMALGAMATION 127 | 128 | #ifdef JSON_IN_CPPTL 129 | #include 130 | #ifndef JSON_USE_CPPTL 131 | #define JSON_USE_CPPTL 1 132 | #endif 133 | #endif 134 | 135 | #ifdef JSON_IN_CPPTL 136 | #define JSON_API CPPTL_API 137 | #elif defined(JSON_DLL_BUILD) 138 | #if defined(_MSC_VER) || defined(__MINGW32__) 139 | #define JSON_API __declspec(dllexport) 140 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING 141 | #elif defined(__GNUC__) || defined(__clang__) 142 | #define JSON_API __attribute__((visibility("default"))) 143 | #endif // if defined(_MSC_VER) 144 | #elif defined(JSON_DLL) 145 | #if defined(_MSC_VER) || defined(__MINGW32__) 146 | #define JSON_API __declspec(dllimport) 147 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING 148 | #endif // if defined(_MSC_VER) 149 | #endif // ifdef JSON_IN_CPPTL 150 | #if !defined(JSON_API) 151 | #define JSON_API 152 | #endif 153 | 154 | #if defined(_MSC_VER) && _MSC_VER < 1800 155 | #error \ 156 | "ERROR: Visual Studio 12 (2013) with _MSC_VER=1800 is the oldest supported compiler with sufficient C++11 capabilities" 157 | #endif 158 | 159 | #if defined(_MSC_VER) && _MSC_VER < 1900 160 | // As recommended at 161 | // https://stackoverflow.com/questions/2915672/snprintf-and-visual-studio-2010 162 | extern JSON_API int 163 | msvc_pre1900_c99_snprintf(char* outBuf, size_t size, const char* format, ...); 164 | #define jsoncpp_snprintf msvc_pre1900_c99_snprintf 165 | #else 166 | #define jsoncpp_snprintf std::snprintf 167 | #endif 168 | 169 | // If JSON_NO_INT64 is defined, then Json only support C++ "int" type for 170 | // integer 171 | // Storages, and 64 bits integer support is disabled. 172 | // #define JSON_NO_INT64 1 173 | 174 | // JSONCPP_OVERRIDE is maintained for backwards compatibility of external tools. 175 | // C++11 should be used directly in JSONCPP. 176 | #define JSONCPP_OVERRIDE override 177 | 178 | #if __cplusplus >= 201103L 179 | #define JSONCPP_NOEXCEPT noexcept 180 | #define JSONCPP_OP_EXPLICIT explicit 181 | #elif defined(_MSC_VER) && _MSC_VER < 1900 182 | #define JSONCPP_NOEXCEPT throw() 183 | #define JSONCPP_OP_EXPLICIT explicit 184 | #elif defined(_MSC_VER) && _MSC_VER >= 1900 185 | #define JSONCPP_NOEXCEPT noexcept 186 | #define JSONCPP_OP_EXPLICIT explicit 187 | #else 188 | #define JSONCPP_NOEXCEPT throw() 189 | #define JSONCPP_OP_EXPLICIT 190 | #endif 191 | 192 | #if defined(__clang__) 193 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1 194 | #elif defined(__GNUC__) && (__GNUC__ >= 6) 195 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1 196 | #endif 197 | 198 | #if !defined(JSON_IS_AMALGAMATION) 199 | 200 | #include "allocator.h" 201 | #include "version.h" 202 | 203 | #endif // if !defined(JSON_IS_AMALGAMATION) 204 | 205 | namespace Json { 206 | typedef int Int; 207 | typedef unsigned int UInt; 208 | #if defined(JSON_NO_INT64) 209 | typedef int LargestInt; 210 | typedef unsigned int LargestUInt; 211 | #undef JSON_HAS_INT64 212 | #else // if defined(JSON_NO_INT64) 213 | // For Microsoft Visual use specific types as long long is not supported 214 | #if defined(_MSC_VER) // Microsoft Visual Studio 215 | typedef __int64 Int64; 216 | typedef unsigned __int64 UInt64; 217 | #else // if defined(_MSC_VER) // Other platforms, use long long 218 | typedef int64_t Int64; 219 | typedef uint64_t UInt64; 220 | #endif // if defined(_MSC_VER) 221 | typedef Int64 LargestInt; 222 | typedef UInt64 LargestUInt; 223 | #define JSON_HAS_INT64 224 | #endif // if defined(JSON_NO_INT64) 225 | 226 | template 227 | using Allocator = typename std::conditional, 229 | std::allocator>::type; 230 | using String = std::basic_string, Allocator>; 231 | using IStringStream = std::basic_istringstream; 234 | using OStringStream = std::basic_ostringstream; 237 | using IStream = std::istream; 238 | using OStream = std::ostream; 239 | } // namespace Json 240 | 241 | // Legacy names (formerly macros). 242 | using JSONCPP_STRING = Json::String; 243 | using JSONCPP_ISTRINGSTREAM = Json::IStringStream; 244 | using JSONCPP_OSTRINGSTREAM = Json::OStringStream; 245 | using JSONCPP_ISTREAM = Json::IStream; 246 | using JSONCPP_OSTREAM = Json::OStream; 247 | 248 | #endif // JSON_CONFIG_H_INCLUDED 249 | 250 | // ////////////////////////////////////////////////////////////////////// 251 | // End of content of file: include/json/config.h 252 | // ////////////////////////////////////////////////////////////////////// 253 | 254 | 255 | 256 | 257 | 258 | 259 | // ////////////////////////////////////////////////////////////////////// 260 | // Beginning of content of file: include/json/forwards.h 261 | // ////////////////////////////////////////////////////////////////////// 262 | 263 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 264 | // Distributed under MIT license, or public domain if desired and 265 | // recognized in your jurisdiction. 266 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 267 | 268 | #ifndef JSON_FORWARDS_H_INCLUDED 269 | #define JSON_FORWARDS_H_INCLUDED 270 | 271 | #if !defined(JSON_IS_AMALGAMATION) 272 | #include "config.h" 273 | #endif // if !defined(JSON_IS_AMALGAMATION) 274 | 275 | namespace Json { 276 | 277 | // writer.h 278 | class StreamWriter; 279 | class StreamWriterBuilder; 280 | class Writer; 281 | class FastWriter; 282 | class StyledWriter; 283 | class StyledStreamWriter; 284 | 285 | // reader.h 286 | class Reader; 287 | class CharReader; 288 | class CharReaderBuilder; 289 | 290 | // json_features.h 291 | class Features; 292 | 293 | // value.h 294 | typedef unsigned int ArrayIndex; 295 | class StaticString; 296 | class Path; 297 | class PathArgument; 298 | class Value; 299 | class ValueIteratorBase; 300 | class ValueIterator; 301 | class ValueConstIterator; 302 | 303 | } // namespace Json 304 | 305 | #endif // JSON_FORWARDS_H_INCLUDED 306 | 307 | // ////////////////////////////////////////////////////////////////////// 308 | // End of content of file: include/json/forwards.h 309 | // ////////////////////////////////////////////////////////////////////// 310 | 311 | 312 | 313 | 314 | 315 | #endif //ifndef JSON_FORWARD_AMALGAMATED_H_INCLUDED 316 | -------------------------------------------------------------------------------- /src/plugin/UpsampleKernel.cu: -------------------------------------------------------------------------------- 1 | #include "UpsmapleKernel.h" 2 | 3 | 4 | /** 5 | * @brief caculate the number of cuda kernel for upsample. (Cite from: 《GPU高性能编程CUDA实战》P46,P47) 6 | * 7 | * @param total_thread_num: the number of cuda thread of you want to used for upsample 8 | * @param max_thread_num: the gpu device property 9 | * @return int the number of cuda kernel for upsample 10 | */ 11 | int get_kernel_num(int total_thread_num, int max_thread_num) 12 | { 13 | return (total_thread_num + max_thread_num - 1)/max_thread_num; 14 | } 15 | 16 | int get_max_thread_num() 17 | { 18 | cudaDeviceProp prop; 19 | cudaGetDeviceProperties(&prop, 0); 20 | return prop.maxThreadsPerBlock; 21 | } 22 | 23 | __host__ __forceinline__ float linear_upsampling_compute_scale(int input_size, int output_size) 24 | { 25 | return float(input_size)/float(output_size) ; 26 | } 27 | 28 | __device__ __forceinline__ float linear_upsampling_compute_source_index(float scale, int dst_index, int intput_size) 29 | { 30 | float src_idx = scale * (dst_index + 0.5)-0.5; 31 | return (src_idx>=0) ? src_idx : 0; 32 | } 33 | 34 | 35 | __device__ __forceinline__ int get_index(const int batch_idx, const int channel_idx, const int height_idx, const int width_idx, 36 | const int batch_total, const int channel_total, const int width) 37 | { 38 | int ret_idx = batch_idx * batch_total 39 | + channel_idx * channel_total 40 | + height_idx * width 41 | + width_idx; 42 | return ret_idx; 43 | } 44 | 45 | /** 46 | * @brief 47 | * 48 | * @tparam T 49 | * @param n 50 | * @param input_shape: input data shape. such as [batch, channel, height, width] 51 | * @param rate_h 52 | * @param rate_w 53 | * @param inputs 54 | * @param outputs 55 | * @return __global__ BilinearKernel 56 | * @TODO: 57 | * 58 | */ 59 | 60 | 61 | template 62 | __global__ void BilinearKernel( 63 | const int n, 64 | int input_b, 65 | int input_c, 66 | int input_h, 67 | int input_w, 68 | int output_h, 69 | int output_w, 70 | const float rate_h, 71 | const float rate_w, 72 | const T* inputs, 73 | T* outputs) 74 | { 75 | 76 | int index = threadIdx.x + blockIdx.x * blockDim.x; 77 | if(index < n) 78 | { 79 | const int w2 = index % output_w; 80 | const int h2 = index / output_w; 81 | 82 | 83 | const float h1r = linear_upsampling_compute_source_index(rate_h, h2, input_h); 84 | const int h1 = int(h1r); 85 | const int h1p = (h1 < input_h - 1) ? 1 : 0; 86 | const float h1lambda = h1r - h1; 87 | const float h0lambda = 1 - h1lambda; 88 | 89 | const float w1r = linear_upsampling_compute_source_index(rate_w, w2, input_w); 90 | const int w1 = int(w1r); 91 | const int w1p = (w1 < input_w - 1) ? 1 : 0; 92 | const float w1lambda = w1r - w1; 93 | const float w0lambda = 1 - w1lambda; 94 | 95 | int s_batch_total_1 = input_c * input_h * input_w; 96 | int s_channel_total_1 = input_h * input_w; 97 | 98 | int s_batch_total_2 = input_c * output_h * output_w; 99 | int s_channel_total_2 = output_h * output_w; 100 | 101 | 102 | const int batch_size = input_b; 103 | const int channel_size = input_c; 104 | 105 | for(int b_idx=0; b_idx<<< kernel_num, max_threads, 0, stream>>>(n,input_b,input_c,input_h,input_w, 141 | output_h, output_w, 142 | rate_h, rate_w, 143 | static_cast(inputs), 144 | static_cast(outputs)); 145 | return 0; 146 | } 147 | -------------------------------------------------------------------------------- /src/plugin/UpsamplePlugin.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "UpsmapleKernel.h" 3 | #include "UpsamplePlugin.h" 4 | 5 | #include 6 | #include 7 | 8 | using namespace nvinfer1; 9 | 10 | // Upsample plugin specific constants 11 | namespace { 12 | static const char* UPSAMPLE_PLUGIN_VERSION{"1"}; 13 | static const char* UPSAMPLE_PLUGIN_NAME{"UpsamplePlugin"}; 14 | } 15 | 16 | // Static class fields initialization 17 | PluginFieldCollection UpsamplePluginCreator::mFC{}; 18 | std::vector UpsamplePluginCreator::mPluginAttributes; 19 | 20 | REGISTER_TENSORRT_PLUGIN(UpsamplePluginCreator); 21 | 22 | template 23 | void writeToBuffer(char*& buffer, const T& val) 24 | { 25 | *reinterpret_cast(buffer) = val; 26 | buffer += sizeof(T); 27 | } 28 | 29 | // Helper function for deserializing plugin 30 | template 31 | T readFromBuffer(const char*& buffer) 32 | { 33 | T val = *reinterpret_cast(buffer); 34 | buffer += sizeof(T); 35 | return val; 36 | } 37 | 38 | UpsamplePlugin::UpsamplePlugin(const std::string name, float scale_h, float scale_w) 39 | : mLayerName(name) 40 | , mScaleFactor_h(scale_h) 41 | , mScaleFactor_w(scale_w) 42 | { 43 | mInputShape.c() = -1; 44 | mInputShape.h() = -1; 45 | mInputShape.w() = -1; 46 | mInputVolume = 0; 47 | } 48 | 49 | UpsamplePlugin::UpsamplePlugin(const std::string name, const void* data, size_t length) 50 | : mLayerName(name) 51 | { 52 | const char *d = static_cast(data); 53 | const char *a = d; 54 | 55 | mScaleFactor_h = readFromBuffer(d); 56 | mScaleFactor_w = readFromBuffer(d); 57 | mInputVolume = readFromBuffer(d); 58 | mInputShape.c() = readFromBuffer(d); 59 | mInputShape.h() = readFromBuffer(d); 60 | mInputShape.w() = readFromBuffer(d); 61 | 62 | assert(d == (a + length)); 63 | 64 | } 65 | 66 | const char* UpsamplePlugin::getPluginType() const 67 | { 68 | return UPSAMPLE_PLUGIN_NAME; 69 | } 70 | 71 | const char* UpsamplePlugin::getPluginVersion() const 72 | { 73 | return UPSAMPLE_PLUGIN_VERSION; 74 | } 75 | 76 | int UpsamplePlugin::getNbOutputs() const 77 | { 78 | return 1; 79 | } 80 | 81 | Dims UpsamplePlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 82 | { 83 | assert(index == 0); 84 | assert(nbInputDims == 1); 85 | assert(inputs[0].nbDims == 3); 86 | return nvinfer1::DimsCHW{inputs[0].d[0],int(inputs[0].d[1]*mScaleFactor_h), int(inputs[0].d[2]*mScaleFactor_w)}; 87 | } 88 | 89 | int UpsamplePlugin::initialize() 90 | { 91 | //printf("UpsamplePlugin::initialize\n"); 92 | return 0; 93 | } 94 | 95 | 96 | int UpsamplePlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void*, cudaStream_t stream) 97 | { 98 | //printf("UpsamplePlugin::enqueue\n"); 99 | int status = -1; 100 | 101 | // Our plugin outputs only one tensor 102 | void* output = outputs[0]; 103 | 104 | // Launch CUDA kernel wrapper and save its return value 105 | status = UpsampleInference(stream, mInputVolume, 106 | batchSize, mInputShape.c(), mInputShape.h(), mInputShape.w(), 107 | mScaleFactor_h,mScaleFactor_w, 108 | inputs[0], output); 109 | return status; 110 | } 111 | 112 | size_t UpsamplePlugin::getSerializationSize() const 113 | { 114 | //printf("UpsamplePlugin::getSerializationSize\n"); 115 | return sizeof(mScaleFactor_h) + sizeof(mScaleFactor_w) + 116 | sizeof(mInputVolume) + sizeof(mInputShape.c()) + 117 | sizeof(mInputShape.h()) + sizeof(mInputShape.w()); 118 | } 119 | 120 | 121 | void UpsamplePlugin::serialize(void* buffer) const 122 | { 123 | //printf("UpsamplePlugin::serialize\n"); 124 | char *d = static_cast(buffer); 125 | const char *a = d; 126 | 127 | writeToBuffer(d, mScaleFactor_h); 128 | writeToBuffer(d, mScaleFactor_w); 129 | writeToBuffer(d, mInputVolume); 130 | writeToBuffer(d, mInputShape.c()); 131 | writeToBuffer(d, mInputShape.h()); 132 | writeToBuffer(d, mInputShape.w()); 133 | 134 | assert(d == a + getSerializationSize()); 135 | } 136 | 137 | void UpsamplePlugin::configureWithFormat(const Dims* inputs, int nbInputs, const Dims* outputs, int nbOutputs, DataType type, PluginFormat format, int) 138 | { 139 | assert(nbOutputs == 1); 140 | assert(type == DataType::kFLOAT); 141 | assert(format == PluginFormat::kNCHW); 142 | assert(inputs[0].nbDims == 3); 143 | 144 | size_t volume = int(inputs[0].d[1]*mScaleFactor_h) * int(inputs[0].d[2]*mScaleFactor_w); 145 | mInputVolume = volume; 146 | mInputShape.c() = inputs[0].d[0]; 147 | mInputShape.h() = inputs[0].d[1]; 148 | mInputShape.w() = inputs[0].d[2]; 149 | } 150 | 151 | bool UpsamplePlugin::supportsFormat(DataType type, PluginFormat format) const 152 | { 153 | if (type == DataType::kFLOAT && format == PluginFormat::kNCHW) 154 | return true; 155 | else 156 | return false; 157 | } 158 | 159 | void UpsamplePlugin::terminate() {} 160 | 161 | void UpsamplePlugin::destroy() { 162 | // This gets called when the network containing plugin is destroyed 163 | delete this; 164 | } 165 | 166 | IPluginV2* UpsamplePlugin::clone() const 167 | { 168 | return new UpsamplePlugin(mLayerName, mScaleFactor_h, mScaleFactor_w); 169 | } 170 | 171 | void UpsamplePlugin::setPluginNamespace(const char* libNamespace) 172 | { 173 | mNamespace = libNamespace; 174 | } 175 | 176 | const char* UpsamplePlugin::getPluginNamespace() const 177 | { 178 | return mNamespace.c_str(); 179 | } 180 | 181 | UpsamplePluginCreator::UpsamplePluginCreator() 182 | { 183 | mPluginAttributes.emplace_back(PluginField("scaleFactor", nullptr, PluginFieldType::kFLOAT32, 1)); 184 | 185 | mFC.nbFields = mPluginAttributes.size(); 186 | mFC.fields = mPluginAttributes.data(); 187 | } 188 | const char* UpsamplePluginCreator::getPluginName() const 189 | { 190 | return UPSAMPLE_PLUGIN_NAME; 191 | } 192 | 193 | const char* UpsamplePluginCreator::getPluginVersion() const 194 | { 195 | return UPSAMPLE_PLUGIN_VERSION; 196 | } 197 | 198 | const PluginFieldCollection* UpsamplePluginCreator::getFieldNames() 199 | { 200 | return &mFC; 201 | } 202 | 203 | IPluginV2* UpsamplePluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) 204 | { 205 | float scaleFactor_h = 0.f; 206 | float scaleFactor_w = 0.f; 207 | const PluginField* fields = fc->fields; 208 | 209 | assert(fc->nbFields == 1); 210 | for (int i = 0; i < fc->nbFields; i++){ 211 | 212 | if (strcmp(fields[i].name, "scaleFactor") == 0) { 213 | assert(fields[i].type == PluginFieldType::kFLOAT32); 214 | scaleFactor_h = *(static_cast(fields[i].data)); 215 | scaleFactor_w = *(static_cast(fields[i].data)+1); 216 | //std::cout< 6 | #include 7 | 8 | 9 | using namespace nvinfer1; 10 | 11 | class UpsamplePlugin : public IPluginV2 12 | { 13 | public: 14 | UpsamplePlugin(const std::string name, float scale_h,float scale_w); 15 | 16 | UpsamplePlugin(const std::string name, const void* data, size_t length); 17 | 18 | // It doesn't make sense to make UpsamplePlugin without arguments, so we delete default constructor. 19 | UpsamplePlugin() = delete; 20 | 21 | int getNbOutputs() const override; 22 | 23 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 24 | 25 | int initialize() override; 26 | 27 | void terminate() override; 28 | 29 | size_t getWorkspaceSize(int) const override { return 0; }; 30 | 31 | int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override; 32 | 33 | size_t getSerializationSize() const override; 34 | 35 | void serialize(void* buffer) const override; 36 | 37 | void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override; 38 | 39 | bool supportsFormat(DataType type, PluginFormat format) const override; 40 | 41 | const char* getPluginType() const override; 42 | 43 | const char* getPluginVersion() const override; 44 | 45 | void destroy() override; 46 | 47 | nvinfer1::IPluginV2* clone() const override; 48 | 49 | void setPluginNamespace(const char* pluginNamespace) override; 50 | 51 | const char* getPluginNamespace() const override; 52 | 53 | private: 54 | const std::string mLayerName; 55 | bool mAlignCorners; 56 | float mScaleFactor_h; 57 | float mScaleFactor_w; 58 | size_t mInputVolume; 59 | DimsCHW mInputShape; 60 | std::string mNamespace; 61 | }; 62 | 63 | class UpsamplePluginCreator : public IPluginCreator 64 | { 65 | public: 66 | UpsamplePluginCreator(); 67 | 68 | const char* getPluginName() const override; 69 | 70 | const char* getPluginVersion() const override; 71 | 72 | const PluginFieldCollection* getFieldNames() override; 73 | 74 | IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) override; 75 | 76 | IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; 77 | 78 | void setPluginNamespace(const char* pluginNamespace) override; 79 | 80 | const char* getPluginNamespace() const override; 81 | 82 | private: 83 | static PluginFieldCollection mFC; 84 | static std::vector mPluginAttributes; 85 | std::string mNamespace; 86 | }; 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /src/plugin/UpsmapleKernel.h: -------------------------------------------------------------------------------- 1 | #ifndef UPSAMPLE_KERNEL_H 2 | #define UPSAMPLE_KERNEL_H 3 | 4 | #include 5 | #include "NvInfer.h" 6 | 7 | int UpsampleInference( 8 | cudaStream_t stream, 9 | int n, 10 | int input_b, 11 | int input_c, 12 | int input_h, 13 | int input_w, 14 | float scale_h, 15 | float scale_w, 16 | const void* inputs, 17 | void* outputs); 18 | 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/plugin/hardswish.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "hardswish.h" 3 | #include "utils.h" 4 | 5 | namespace nvinfer1 6 | { 7 | HardSwishPlugin::HardSwishPlugin() 8 | { 9 | } 10 | 11 | HardSwishPlugin::~HardSwishPlugin() 12 | { 13 | } 14 | 15 | // create the plugin at runtime from a byte stream 16 | HardSwishPlugin::HardSwishPlugin(const void* data, size_t length) 17 | { 18 | const char *d = reinterpret_cast(data), *a = d; 19 | read(d, mInputSize); 20 | assert(d == a + length); 21 | } 22 | 23 | void HardSwishPlugin::serialize(void* buffer) const 24 | { 25 | char* d = static_cast(buffer), *a = d; 26 | write(d, mInputSize); 27 | assert(d == a + getSerializationSize()); 28 | } 29 | 30 | size_t HardSwishPlugin::getSerializationSize() const 31 | { 32 | return sizeof(mInputSize); 33 | } 34 | 35 | int HardSwishPlugin::initialize() 36 | { 37 | return 0; 38 | } 39 | 40 | Dims HardSwishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 41 | { 42 | assert(nbInputDims == 1); 43 | assert(index == 0); 44 | return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]); 45 | } 46 | 47 | // Set plugin namespace 48 | void HardSwishPlugin::setPluginNamespace(const char* pluginNamespace) 49 | { 50 | mPluginNamespace = pluginNamespace; 51 | } 52 | 53 | const char* HardSwishPlugin::getPluginNamespace() const 54 | { 55 | return mPluginNamespace; 56 | } 57 | 58 | // Return the DataType of the plugin output at the requested index 59 | DataType HardSwishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const 60 | { 61 | return DataType::kFLOAT; 62 | } 63 | 64 | // Return true if output tensor is broadcast across a batch. 65 | bool HardSwishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const 66 | { 67 | return false; 68 | } 69 | 70 | // Return true if plugin can use input that is broadcast across batch without replication. 71 | bool HardSwishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const 72 | { 73 | return false; 74 | } 75 | 76 | void HardSwishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 77 | { 78 | mInputSize = in[0].dims.d[0] * in[0].dims.d[1] * in[0].dims.d[2]; 79 | } 80 | 81 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource. 82 | void HardSwishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) 83 | { 84 | } 85 | 86 | // Detach the plugin object from its execution context. 87 | void HardSwishPlugin::detachFromContext() {} 88 | 89 | const char* HardSwishPlugin::getPluginType() const 90 | { 91 | return "HardSwishLayer_TRT"; 92 | } 93 | 94 | const char* HardSwishPlugin::getPluginVersion() const 95 | { 96 | return "1"; 97 | } 98 | 99 | void HardSwishPlugin::destroy() 100 | { 101 | delete this; 102 | } 103 | 104 | // Clone the plugin 105 | IPluginV2IOExt* HardSwishPlugin::clone() const 106 | { 107 | HardSwishPlugin *p = new HardSwishPlugin(); 108 | p->setPluginNamespace(mPluginNamespace); 109 | p->setInputSize(mInputSize); 110 | return p; 111 | } 112 | 113 | 114 | __global__ void HardSwishKer(const float *in, float *out, int size) { 115 | int idx = threadIdx.x + blockIdx.x * blockDim.x; 116 | if (idx >= size) 117 | return; 118 | 119 | if (in[idx] >= 3.0f) 120 | out[idx] = in[idx]; 121 | else if (in[idx] < -3.0f) 122 | out[idx] = 0.0f; 123 | else 124 | out[idx] = in[idx] * (in[idx] + 3.0f) / 6.0f; 125 | } 126 | 127 | void HardSwishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) { 128 | 129 | int numElem = batchSize * mInputSize; 130 | HardSwishKer<<<(numElem + mThreadCount - 1) / mThreadCount, mThreadCount>>> 131 | (inputs[0], output, numElem); 132 | } 133 | 134 | 135 | int HardSwishPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) 136 | { 137 | forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize); 138 | return 0; 139 | } 140 | 141 | PluginFieldCollection HardSwishPluginCreator::mFC{}; 142 | std::vector HardSwishPluginCreator::mPluginAttributes; 143 | 144 | HardSwishPluginCreator::HardSwishPluginCreator() 145 | { 146 | mPluginAttributes.clear(); 147 | mFC.nbFields = mPluginAttributes.size(); 148 | mFC.fields = mPluginAttributes.data(); 149 | } 150 | 151 | const char* HardSwishPluginCreator::getPluginName() const 152 | { 153 | return "HardSwishLayer_TRT"; 154 | } 155 | 156 | const char* HardSwishPluginCreator::getPluginVersion() const 157 | { 158 | return "1"; 159 | } 160 | 161 | const PluginFieldCollection* HardSwishPluginCreator::getFieldNames() 162 | { 163 | return &mFC; 164 | } 165 | 166 | IPluginV2IOExt* HardSwishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) 167 | { 168 | HardSwishPlugin* obj = new HardSwishPlugin(); 169 | obj->setPluginNamespace(mNamespace.c_str()); 170 | return obj; 171 | } 172 | 173 | IPluginV2IOExt* HardSwishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) 174 | { 175 | // This object will be deleted when the network is destroyed, which will 176 | // call MishPlugin::destroy() 177 | HardSwishPlugin* obj = new HardSwishPlugin(serialData, serialLength); 178 | obj->setPluginNamespace(mNamespace.c_str()); 179 | return obj; 180 | } 181 | 182 | } 183 | -------------------------------------------------------------------------------- /src/plugin/hardswish.h: -------------------------------------------------------------------------------- 1 | #ifndef _HARD_SWISH_LAYER_H 2 | #define _HARD_SWISH_LAYER_H 3 | 4 | #include 5 | #include 6 | #include "NvInfer.h" 7 | 8 | namespace nvinfer1 9 | { 10 | class HardSwishPlugin: public IPluginV2IOExt 11 | { 12 | public: 13 | explicit HardSwishPlugin(); 14 | HardSwishPlugin(const void* data, size_t length); 15 | ~HardSwishPlugin(); 16 | 17 | int getNbOutputs() const override 18 | { 19 | return 1; 20 | } 21 | 22 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 23 | int initialize() override; 24 | virtual void terminate() override {}; 25 | virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;} 26 | virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; 27 | virtual size_t getSerializationSize() const override; 28 | virtual void serialize(void* buffer) const override; 29 | 30 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override { 31 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 32 | } 33 | 34 | const char* getPluginType() const override; 35 | const char* getPluginVersion() const override; 36 | void destroy() override; 37 | IPluginV2IOExt* clone() const override; 38 | void setPluginNamespace(const char* pluginNamespace) override; 39 | const char* getPluginNamespace() const override; 40 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; 41 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; 42 | bool canBroadcastInputAcrossBatch(int inputIndex) const override; 43 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; 44 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; 45 | void detachFromContext() override; 46 | 47 | void setInputSize(int s) { 48 | mInputSize = s; 49 | } 50 | 51 | private: 52 | void forwardGpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1); 53 | int mThreadCount = 256; 54 | int mInputSize; 55 | const char* mPluginNamespace; 56 | }; 57 | 58 | class HardSwishPluginCreator : public IPluginCreator 59 | { 60 | public: 61 | HardSwishPluginCreator(); 62 | ~HardSwishPluginCreator() override = default; 63 | const char* getPluginName() const override; 64 | const char* getPluginVersion() const override; 65 | const PluginFieldCollection* getFieldNames() override; 66 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; 67 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; 68 | 69 | void setPluginNamespace(const char* libNamespace) override 70 | { 71 | mNamespace = libNamespace; 72 | } 73 | 74 | const char* getPluginNamespace() const override 75 | { 76 | return mNamespace.c_str(); 77 | } 78 | 79 | private: 80 | std::string mNamespace; 81 | static PluginFieldCollection mFC; 82 | static std::vector mPluginAttributes; 83 | }; 84 | REGISTER_TENSORRT_PLUGIN(HardSwishPluginCreator); 85 | }; 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /src/plugin/yololayer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "yololayer.h" 3 | #include "utils.h" 4 | 5 | 6 | using namespace Yolo; 7 | 8 | namespace nvinfer1 9 | { 10 | YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector& vYoloKernel) 11 | { 12 | mClassCount = classCount; 13 | mYoloV5NetWidth = netWidth; 14 | mYoloV5NetHeight = netHeight; 15 | mMaxOutObject = maxOut; 16 | mYoloKernel = vYoloKernel; 17 | mKernelCount = vYoloKernel.size(); 18 | 19 | CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*))); 20 | size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2; 21 | for (int ii = 0; ii < mKernelCount; ii++) 22 | { 23 | CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen)); 24 | const auto& yolo = mYoloKernel[ii]; 25 | CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice)); 26 | } 27 | } 28 | YoloLayerPlugin::~YoloLayerPlugin() 29 | { 30 | for (int ii = 0; ii < mKernelCount; ii++) 31 | { 32 | CUDA_CHECK(cudaFree(mAnchor[ii])); 33 | } 34 | CUDA_CHECK(cudaFreeHost(mAnchor)); 35 | } 36 | 37 | // create the plugin at runtime from a byte stream 38 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) 39 | { 40 | //using namespace Tn; 41 | const char *d = reinterpret_cast(data), *a = d; 42 | read(d, mClassCount); 43 | read(d, mThreadCount); 44 | read(d, mKernelCount); 45 | read(d, mYoloV5NetWidth); 46 | read(d, mYoloV5NetHeight); 47 | read(d, mMaxOutObject); 48 | mYoloKernel.resize(mKernelCount); 49 | auto kernelSize = mKernelCount * sizeof(YoloKernel); 50 | memcpy(mYoloKernel.data(), d, kernelSize); 51 | d += kernelSize; 52 | CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*))); 53 | size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2; 54 | for (int ii = 0; ii < mKernelCount; ii++) 55 | { 56 | CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen)); 57 | const auto& yolo = mYoloKernel[ii]; 58 | CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice)); 59 | } 60 | assert(d == a + length); 61 | } 62 | 63 | void YoloLayerPlugin::serialize(void* buffer) const 64 | { 65 | //using namespace Tn; 66 | char* d = static_cast(buffer), *a = d; 67 | write(d, mClassCount); 68 | write(d, mThreadCount); 69 | write(d, mKernelCount); 70 | write(d, mYoloV5NetWidth); 71 | write(d, mYoloV5NetHeight); 72 | write(d, mMaxOutObject); 73 | auto kernelSize = mKernelCount * sizeof(YoloKernel); 74 | memcpy(d, mYoloKernel.data(), kernelSize); 75 | d += kernelSize; 76 | 77 | assert(d == a + getSerializationSize()); 78 | } 79 | 80 | size_t YoloLayerPlugin::getSerializationSize() const 81 | { 82 | return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount) + sizeof(Yolo::YoloKernel) * mYoloKernel.size() + sizeof(mYoloV5NetWidth) + sizeof(mYoloV5NetHeight) + sizeof(mMaxOutObject); 83 | } 84 | 85 | int YoloLayerPlugin::initialize() 86 | { 87 | return 0; 88 | } 89 | 90 | Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 91 | { 92 | //output the result to channel 93 | int totalsize = mMaxOutObject * sizeof(Detection) / sizeof(float); 94 | 95 | return Dims3(totalsize + 1, 1, 1); 96 | } 97 | 98 | // Set plugin namespace 99 | void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace) 100 | { 101 | mPluginNamespace = pluginNamespace; 102 | } 103 | 104 | const char* YoloLayerPlugin::getPluginNamespace() const 105 | { 106 | return mPluginNamespace; 107 | } 108 | 109 | // Return the DataType of the plugin output at the requested index 110 | DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const 111 | { 112 | return DataType::kFLOAT; 113 | } 114 | 115 | // Return true if output tensor is broadcast across a batch. 116 | bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const 117 | { 118 | return false; 119 | } 120 | 121 | // Return true if plugin can use input that is broadcast across batch without replication. 122 | bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const 123 | { 124 | return false; 125 | } 126 | 127 | void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 128 | { 129 | } 130 | 131 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource. 132 | void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) 133 | { 134 | } 135 | 136 | // Detach the plugin object from its execution context. 137 | void YoloLayerPlugin::detachFromContext() {} 138 | 139 | const char* YoloLayerPlugin::getPluginType() const 140 | { 141 | return "YoloLayer_TRT"; 142 | } 143 | 144 | const char* YoloLayerPlugin::getPluginVersion() const 145 | { 146 | return "1"; 147 | } 148 | 149 | void YoloLayerPlugin::destroy() 150 | { 151 | delete this; 152 | } 153 | 154 | // Clone the plugin 155 | IPluginV2IOExt* YoloLayerPlugin::clone() const 156 | { 157 | YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, mYoloKernel); 158 | p->setPluginNamespace(mPluginNamespace); 159 | return p; 160 | } 161 | 162 | __device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); }; 163 | 164 | __global__ void CalDetection(const float *input, float *output, int noElements, 165 | const int netwidth, const int netheight, int maxoutobject, int yoloWidth, int yoloHeight, const float anchors[CHECK_COUNT * 2], int classes, int outputElem) 166 | { 167 | 168 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 169 | if (idx >= noElements) return; 170 | 171 | int total_grid = yoloWidth * yoloHeight; 172 | int bnIdx = idx / total_grid; 173 | idx = idx - total_grid * bnIdx; 174 | int info_len_i = 5 + classes; 175 | const float* curInput = input + bnIdx * (info_len_i * total_grid * CHECK_COUNT); 176 | 177 | for (int k = 0; k < 3; ++k) { 178 | float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]); 179 | if (box_prob < IGNORE_THRESH) continue; 180 | int class_id = 0; 181 | float max_cls_prob = 0.0; 182 | for (int i = 5; i < info_len_i; ++i) { 183 | float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]); 184 | if (p > max_cls_prob) { 185 | max_cls_prob = p; 186 | class_id = i - 5; 187 | } 188 | } 189 | float *res_count = output + bnIdx * outputElem; 190 | int count = (int)atomicAdd(res_count, 1); 191 | if (count >= maxoutobject) return; 192 | char* data = (char *)res_count + sizeof(float) + count * sizeof(Detection); 193 | Detection* det = (Detection*)(data); 194 | 195 | int row = idx / yoloWidth; 196 | int col = idx % yoloWidth; 197 | 198 | //Location 199 | // pytorch: 200 | // y = x[i].sigmoid() 201 | // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 202 | // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 203 | // X: (sigmoid(tx) + cx)/FeaturemapW * netwidth 204 | det->bbox[0] = (col - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid])) * netwidth / yoloWidth; 205 | det->bbox[1] = (row - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid])) * netheight / yoloHeight; 206 | 207 | // W: (Pw * e^tw) / FeaturemapW * netwidth 208 | // v5: https://github.com/ultralytics/yolov5/issues/471 209 | det->bbox[2] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]); 210 | det->bbox[2] = det->bbox[2] * det->bbox[2] * anchors[2 * k]; 211 | det->bbox[3] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]); 212 | det->bbox[3] = det->bbox[3] * det->bbox[3] * anchors[2 * k + 1]; 213 | det->conf = box_prob * max_cls_prob; 214 | det->class_id = class_id; 215 | } 216 | } 217 | 218 | void YoloLayerPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) 219 | { 220 | int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float); 221 | for (int idx = 0; idx < batchSize; ++idx) { 222 | CUDA_CHECK(cudaMemset(output + idx * outputElem, 0, sizeof(float))); 223 | } 224 | int numElem = 0; 225 | for (unsigned int i = 0; i < mYoloKernel.size(); ++i) 226 | { 227 | const auto& yolo = mYoloKernel[i]; 228 | numElem = yolo.width*yolo.height*batchSize; 229 | if (numElem < mThreadCount) 230 | mThreadCount = numElem; 231 | 232 | //printf("Net: %d %d \n", mYoloV5NetWidth, mYoloV5NetHeight); 233 | CalDetection << < (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount >> > 234 | (inputs[i], output, numElem, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, yolo.width, yolo.height, (float *)mAnchor[i], mClassCount, outputElem); 235 | } 236 | } 237 | 238 | 239 | int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) 240 | { 241 | forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize); 242 | return 0; 243 | } 244 | 245 | PluginFieldCollection YoloPluginCreator::mFC{}; 246 | std::vector YoloPluginCreator::mPluginAttributes; 247 | 248 | YoloPluginCreator::YoloPluginCreator() 249 | { 250 | mPluginAttributes.clear(); 251 | 252 | mFC.nbFields = mPluginAttributes.size(); 253 | mFC.fields = mPluginAttributes.data(); 254 | } 255 | 256 | const char* YoloPluginCreator::getPluginName() const 257 | { 258 | return "YoloLayer_TRT"; 259 | } 260 | 261 | const char* YoloPluginCreator::getPluginVersion() const 262 | { 263 | return "1"; 264 | } 265 | 266 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() 267 | { 268 | return &mFC; 269 | } 270 | 271 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) 272 | { 273 | int class_count = 80; 274 | int input_w = 416; 275 | int input_h = 416; 276 | int max_output_object_count = 1000; 277 | std::vector yolo_kernels(3); 278 | 279 | const PluginField* fields = fc->fields; 280 | for (int i = 0; i < fc->nbFields; i++) { 281 | if (strcmp(fields[i].name, "netdata") == 0) { 282 | assert(fields[i].type == PluginFieldType::kFLOAT32); 283 | int *tmp = (int*)(fields[i].data); 284 | class_count = tmp[0]; 285 | input_w = tmp[1]; 286 | input_h = tmp[2]; 287 | max_output_object_count = tmp[3]; 288 | } else if (strstr(fields[i].name, "yolodata") != NULL) { 289 | assert(fields[i].type == PluginFieldType::kFLOAT32); 290 | int *tmp = (int*)(fields[i].data); 291 | YoloKernel kernel; 292 | kernel.width = tmp[0]; 293 | kernel.height = tmp[1]; 294 | for (int j = 0; j < fields[i].length - 2; j++) { 295 | kernel.anchors[j] = tmp[j + 2]; 296 | } 297 | yolo_kernels[2 - (fields[i].name[8] - '1')] = kernel; 298 | } 299 | } 300 | YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, yolo_kernels); 301 | obj->setPluginNamespace(mNamespace.c_str()); 302 | return obj; 303 | } 304 | 305 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) 306 | { 307 | // This object will be deleted when the network is destroyed, which will 308 | // call YoloLayerPlugin::destroy() 309 | YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength); 310 | obj->setPluginNamespace(mNamespace.c_str()); 311 | return obj; 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /src/plugin/yololayer.h: -------------------------------------------------------------------------------- 1 | #ifndef _YOLO_LAYER_H 2 | #define _YOLO_LAYER_H 3 | /* 4 | * 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | using namespace std; 11 | namespace Yolo 12 | { 13 | static constexpr int CHECK_COUNT = 3; 14 | static constexpr float IGNORE_THRESH = 0.1f; 15 | struct YoloKernel 16 | { 17 | int width; 18 | int height; 19 | float anchors[CHECK_COUNT * 2]; 20 | }; 21 | 22 | static constexpr int LOCATIONS = 4; 23 | struct alignas(float) Detection { 24 | //center_x center_y w h 25 | float bbox[LOCATIONS]; 26 | float conf; // bbox_conf * cls_conf 27 | float class_id; 28 | }; 29 | } 30 | 31 | namespace nvinfer1 32 | { 33 | class YoloLayerPlugin : public IPluginV2IOExt 34 | { 35 | public: 36 | YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector& vYoloKernel); 37 | YoloLayerPlugin(const void* data, size_t length); 38 | ~YoloLayerPlugin(); 39 | 40 | int getNbOutputs() const override 41 | { 42 | return 1; 43 | } 44 | 45 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 46 | 47 | int initialize() override; 48 | 49 | virtual void terminate() override {}; 50 | 51 | virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0; } 52 | 53 | virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; 54 | 55 | virtual size_t getSerializationSize() const override; 56 | 57 | virtual void serialize(void* buffer) const override; 58 | 59 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override { 60 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 61 | } 62 | 63 | const char* getPluginType() const override; 64 | 65 | const char* getPluginVersion() const override; 66 | 67 | void destroy() override; 68 | 69 | IPluginV2IOExt* clone() const override; 70 | 71 | void setPluginNamespace(const char* pluginNamespace) override; 72 | 73 | const char* getPluginNamespace() const override; 74 | 75 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; 76 | 77 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; 78 | 79 | bool canBroadcastInputAcrossBatch(int inputIndex) const override; 80 | 81 | void attachToContext( 82 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; 83 | 84 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; 85 | 86 | void detachFromContext() override; 87 | 88 | private: 89 | void forwardGpu(const float *const * inputs, float * output, cudaStream_t stream, int batchSize = 1); 90 | int mThreadCount = 256; 91 | const char* mPluginNamespace; 92 | int mKernelCount; 93 | int mClassCount; 94 | int mYoloV5NetWidth; 95 | int mYoloV5NetHeight; 96 | int mMaxOutObject; 97 | std::vector mYoloKernel; 98 | void** mAnchor; 99 | }; 100 | 101 | class YoloPluginCreator : public IPluginCreator 102 | { 103 | public: 104 | YoloPluginCreator(); 105 | 106 | ~YoloPluginCreator() override = default; 107 | 108 | const char* getPluginName() const override; 109 | 110 | const char* getPluginVersion() const override; 111 | 112 | const PluginFieldCollection* getFieldNames() override; 113 | 114 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; 115 | 116 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; 117 | 118 | void setPluginNamespace(const char* libNamespace) override 119 | { 120 | mNamespace = libNamespace; 121 | } 122 | 123 | const char* getPluginNamespace() const override 124 | { 125 | return mNamespace.c_str(); 126 | } 127 | 128 | private: 129 | std::string mNamespace; 130 | static PluginFieldCollection mFC; 131 | static std::vector mPluginAttributes; 132 | }; 133 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); 134 | }; 135 | 136 | #endif 137 | -------------------------------------------------------------------------------- /src/trt.h: -------------------------------------------------------------------------------- 1 | #ifndef TRT_H 2 | #define TRT_H 3 | 4 | 5 | #include 6 | #include "json.h" 7 | #include 8 | #include "NvOnnxParser.h" 9 | #include "NvOnnxConfig.h" 10 | #include "calibrator.h" 11 | #include "UpsamplePlugin.h" 12 | #include "yololayer.h" 13 | #include "hardswish.h" 14 | 15 | 16 | struct Param{ 17 | int input_c; 18 | int input_h; 19 | int input_w; 20 | bool createENG; 21 | string ENGPath; 22 | bool fp16; 23 | bool int8; 24 | bool Div_255; 25 | string cali_txt; 26 | string cali_table; 27 | vector mean; 28 | vector std; 29 | string weightPath; 30 | string wtsAllPath; 31 | string onnxPath; 32 | string inputBlobName; 33 | //string outputBlobName; 34 | int maxBatchsize; 35 | int outputSize = 0; 36 | bool doInfer; 37 | int BatchSize; 38 | string imgDir; 39 | string imgType; 40 | Json::Value layers; 41 | vector outputNames; 42 | }; 43 | 44 | class Logger:public nvinfer1::ILogger 45 | { 46 | public: 47 | void log(nvinfer1::ILogger::Severity severity, const char *msg) override 48 | { 49 | // suppress info-level messages 50 | if (severity == Severity::kINFO) 51 | return; 52 | 53 | switch (severity) 54 | { 55 | case Severity::kINTERNAL_ERROR: 56 | std::cerr << "INTERNAL_ERROR: "; 57 | break; 58 | case Severity::kERROR: 59 | std::cerr << "ERROR: "; 60 | break; 61 | case Severity::kWARNING: 62 | std::cerr << "WARNING: "; 63 | break; 64 | case Severity::kINFO: 65 | std::cerr << "INFO: "; 66 | break; 67 | default: 68 | std::cerr << "UNKNOWN: "; 69 | break; 70 | } 71 | std::cerr << msg << std::endl; 72 | } 73 | }; 74 | 75 | class trt 76 | { 77 | public: 78 | trt(const string &jsonPath); 79 | ~trt(); 80 | void debug_print(nvinfer1::ITensor *input_tensor,const string &head); 81 | void printWeight(Weights wts, int wtsSize); 82 | vector loadWeights(const string &filePath); 83 | void createENG(); 84 | void onnx2trt(); 85 | void addLayer(Json::Value layer); 86 | void inference_init(int batchsize); 87 | void doInference(const float *input, int batchsize, float *output); 88 | void doInference_int(const float *input, int batchsize, int *output); 89 | ITensor* trt_convNet(ITensor* input,string weightsPath,string biasFile, 90 | int output_c,DimsHW kernel,DimsHW stride = DimsHW{1,1}, 91 | DimsHW padding =DimsHW{0,0},DimsHW dilations =DimsHW{1,1}, 92 | int groups = 1,bool pre = false,bool post = false); 93 | ITensor* trt_deconvNet(ITensor* input,string weightsPath,string biasFile, 94 | int output_c,DimsHW kernel,DimsHW stride = DimsHW{1,1}, 95 | DimsHW padding =DimsHW{0,0},DimsHW dilations =DimsHW{1,1}, 96 | int groups = 1,bool pre = false,bool post = false); 97 | ITensor* trt_bnNet(ITensor* input, string weightsPath,float eps=1.0e-5); 98 | ITensor* trt_activeNet(ITensor* input,string acti_type,float alpha=0.0,float beta=0.0); 99 | ITensor* trt_poolNet(ITensor* input,string pooltype,DimsHW kernel,DimsHW stride,DimsHW padding); 100 | ITensor* trt_eltNet(ITensor* input1,ITensor* input2,string elt_Type); 101 | ITensor* conv_bn_active(ITensor* input,string weightsPath, int output_c,int k,int s=1,int p=0,int dilations=1,int groups=1, 102 | float eps=1e-5, string active_type="relu", float alpha=0.0, float beta=0.0); 103 | ITensor* trt_resnetCBA(Json::Value temp,ITensor* input); 104 | void trt_preInput(Json::Value layer); 105 | void trt_conv(Json::Value layer); 106 | void trt_deconv(Json::Value layer); 107 | void trt_padding(Json::Value layer); 108 | void trt_bn(Json::Value layer); 109 | void trt_active(Json::Value layer); 110 | void trt_pool(Json::Value layer); 111 | void trt_Pool(Json::Value layer); 112 | void trt_elt(Json::Value layer); 113 | void trt_fc(Json::Value layer); 114 | void trt_concat(Json::Value layer); 115 | void trt_slice(Json::Value layer); 116 | void trt_softmax(Json::Value layer); 117 | void trt_shuffle(Json::Value layer); 118 | void trt_matmul(Json::Value layer); 119 | void trt_topk(Json::Value layer); 120 | void trt_reduce(Json::Value layer); 121 | void trt_constant(Json::Value layer); 122 | void trt_pReLU(Json::Value layer); 123 | void trt_convBnActive(Json::Value layer); 124 | void trt_resnetLayer(Json::Value layer); 125 | void trt_resnet3(Json::Value layer); 126 | void trt_focus(Json::Value layer); 127 | void trt_UpSample(Json::Value layer); 128 | void trt_UpSample_plugin(Json::Value layer); 129 | void trt_groupNorm(Json::Value layer); 130 | void trt_unary(Json::Value layer); 131 | ITensor* convBlock(ITensor* input,int outch,int k,int s,string lname,string acti_type, 132 | float eps=1e-3,float alpha = 0.0); 133 | ITensor* bottleneck(ITensor* input, string lname,string acti_type,int c1, int c2, bool shortcut, float e, 134 | float eps=1e-3,float alpha = 0.0); 135 | ITensor* SPP(); 136 | void yolo_C3(Json::Value layer); 137 | void trt_yolo(Json::Value layer); 138 | void yolo_spp(Json::Value layer); 139 | int out_channel(int &out_c, int &n); 140 | void resnet(Json::Value layer); 141 | void hrnet_res(Json::Value layer); 142 | void hrnet_up(Json::Value layer); 143 | 144 | 145 | 146 | 147 | Param param; 148 | private: 149 | Logger m_logger; 150 | 151 | map Layers; 152 | INetworkDefinition *m_Network; //network 153 | vector m_bindings; 154 | void* temp; 155 | vector outputs; 156 | nvinfer1::IExecutionContext *m_context; 157 | cudaStream_t m_cudaStream; 158 | nvinfer1::ICudaEngine *m_engine; 159 | int inputIndex; 160 | int outputIndex; 161 | }; 162 | 163 | #endif // TRT_H 164 | -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | 4 | void mblobFromImages(cv::InputArrayOfArrays images_, cv::OutputArray blob_, 5 | cv::Size size, const cv::Scalar& mean_, const cv::Scalar& std_, bool swapRB, bool crop) 6 | { 7 | //CV_TRACE_FUNCTION(); 8 | std::vector images; 9 | images_.getMatVector(images); 10 | CV_Assert(!images.empty()); 11 | for (int i = 0; i < images.size(); i++) 12 | { 13 | cv::Size imgSize = images[i].size(); 14 | if (size == cv::Size()) 15 | size = imgSize; 16 | if (size != imgSize) 17 | { 18 | if (crop) 19 | { 20 | float resizeFactor = std::max(size.width / (float)imgSize.width, 21 | size.height / (float)imgSize.height); 22 | resize(images[i], images[i], cv::Size(), resizeFactor, resizeFactor, cv::INTER_LINEAR); 23 | cv::Rect crop(cv::Point(0.5 * (images[i].cols - size.width), 24 | 0.5 * (images[i].rows - size.height)), 25 | size); 26 | images[i] = images[i](crop); 27 | } 28 | else 29 | resize(images[i], images[i], size, 0, 0, cv::INTER_LINEAR); 30 | } 31 | if (images[i].depth() == CV_8U) 32 | images[i].convertTo(images[i], CV_32F); 33 | cv::Scalar mean = mean_; 34 | cv::Scalar std_num = std_; 35 | if (swapRB) 36 | { 37 | std::swap(mean[0], mean[2]); 38 | std::swap(std_num[0], std_num[2]); 39 | } 40 | 41 | images[i] -= mean; 42 | cv::divide(images[i], std_num, images[i]); 43 | } 44 | 45 | size_t i, nimages = images.size(); 46 | cv::Mat image0 = images[0]; 47 | int nch = image0.channels(); 48 | CV_Assert(image0.dims == 2); 49 | cv::Mat image; 50 | if (nch == 3 || nch == 4) 51 | { 52 | int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; 53 | blob_.create(4, sz, CV_32F); 54 | cv::Mat blob = blob_.getMat(); 55 | cv::Mat ch[4]; 56 | 57 | for (i = 0; i < nimages; i++) 58 | { 59 | image = images[i]; 60 | CV_Assert(image.depth() == CV_32F); 61 | nch = image.channels(); 62 | CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); 63 | CV_Assert(image.size() == image0.size()); 64 | 65 | for (int j = 0; j < nch; j++) 66 | ch[j] = cv::Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, j)); 67 | if (swapRB) 68 | std::swap(ch[0], ch[2]); 69 | split(image, ch); 70 | } 71 | } 72 | else 73 | { 74 | CV_Assert(nch == 1); 75 | int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; 76 | blob_.create(4, sz, CV_32F); 77 | cv::Mat blob = blob_.getMat(); 78 | 79 | for (i = 0; i < nimages; i++) 80 | { 81 | cv::Mat image = images[i]; 82 | CV_Assert(image.depth() == CV_32F); 83 | nch = image.channels(); 84 | CV_Assert(image.dims == 2 && (nch == 1)); 85 | CV_Assert(image.size() == image0.size()); 86 | 87 | image.copyTo(cv::Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, 0))); 88 | } 89 | } 90 | } 91 | cv::Mat BlobFromImages(cv::InputArrayOfArrays images, cv::Size size, 92 | const cv::Scalar& mean, const cv::Scalar& std_num, bool swapRB, bool crop) 93 | { 94 | //CV_TRACE_FUNCTION(); 95 | cv::Mat blob; 96 | mblobFromImages(images, blob, size, mean, std_num, swapRB, crop); 97 | return blob; 98 | } 99 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "opencv2/core/core.hpp" 8 | #include "opencv2/dnn/dnn.hpp" 9 | #include "opencv2/imgcodecs/imgcodecs.hpp" 10 | #include "opencv2/imgproc/imgproc.hpp" 11 | using namespace std; 12 | 13 | #ifndef CUDA_CHECK 14 | 15 | #define CUDA_CHECK(callstr) \ 16 | { \ 17 | cudaError_t error_code = callstr; \ 18 | if (error_code != cudaSuccess) { \ 19 | std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ 20 | assert(0); \ 21 | } \ 22 | } 23 | 24 | #endif 25 | 26 | template 27 | void write(char*& buffer, const T& val) 28 | { 29 | *reinterpret_cast(buffer) = val; 30 | buffer += sizeof(T); 31 | } 32 | 33 | template 34 | void read(const char*& buffer, T& val) 35 | { 36 | val = *reinterpret_cast(buffer); 37 | buffer += sizeof(T); 38 | } 39 | 40 | 41 | void mblobFromImages(cv::InputArrayOfArrays images_, cv::OutputArray blob_, 42 | cv::Size size, const cv::Scalar& mean_, const cv::Scalar& std_, bool swapRB, bool crop); 43 | cv::Mat BlobFromImages(cv::InputArrayOfArrays images, cv::Size size, 44 | const cv::Scalar& mean, const cv::Scalar& std_num, bool swapRB, bool crop); 45 | #endif // UTILS_H 46 | --------------------------------------------------------------------------------