├── CMakeLists.txt
├── README.md
├── example
    ├── CMakeLists.txt
    ├── ddrnet
    │   ├── CMakeLists.txt
    │   ├── ReadMe.md
    │   └── main.cpp
    ├── hrnet
    │   ├── CMakeLists.txt
    │   └── main.cpp
    └── yolov5
    │   ├── CMakeLists.txt
    │   └── main.cpp
├── getWeights.py
├── model
    ├── ddrnet.json
    ├── ddrnet
    │   └── ddrnet.json
    ├── deeplabV3_res50.json
    ├── hrnet
    │   ├── hrnet_w18.json
    │   ├── hrnet_w32.json
    │   └── hrnet_w48.json
    ├── resnet50.json
    └── yolo
    │   └── yolov5s.json
└── src
    ├── CMakeLists.txt1
    ├── calibrator.cpp
    ├── calibrator.h
    ├── json-forwards.h
    ├── json.h
    ├── jsoncpp.cpp
    ├── plugin
        ├── UpsampleKernel.cu
        ├── UpsamplePlugin.cpp
        ├── UpsamplePlugin.h
        ├── UpsmapleKernel.h
        ├── hardswish.cu
        ├── hardswish.h
        ├── yololayer.cu
        └── yololayer.h
    ├── trt.cpp
    ├── trt.h
    ├── utils.cpp
    └── utils.h


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #指定CMake的最小版本要求
 2 | cmake_minimum_required(VERSION 3.2)
 3 | 
 4 | #定义工程名称
 5 | project(tensorrtF)
 6 | 
 7 | #指定C++版本11
 8 | set(CMAKE_CXX_STANDARD 11)
 9 | 
10 | #设置g++编译选项 使用c++11 -O3优化等级
11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3")
12 | 
13 | #CUDA设置
14 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
15 | 
16 | #win32环境设置opencv依赖
17 | if(WIN32)
18 | add_definitions(-std=c++11)
19 | set(OpenCV_DIR "D:\\opencv\\opencv346\\build")
20 | endif(WIN32)
21 | 
22 | #UNIX环境设置opencv依赖版本
23 | if(UNIX)
24 | find_package(OpenCV 3.4.3 REQUIRED)#查找opencv依赖，REQUIRED可选字段。表示一定要找到包，找不到的话就立即停掉整个cmake。而如果不指定REQUIRED则cmake会继续执行。
25 | endif(UNIX)
26 | 
27 | #显式定义tensorrt目录
28 | set(TENSORRT_DIR "/home/zxl/TensorRT-7.2.3.4/targets/x86_64-linux-gnu")
29 | 
30 | #寻找CUDA依赖
31 | find_package(CUDA REQUIRED)
32 | 
33 | #支持CUDA语言
34 | enable_language(CUDA)
35 | #输出正常 STATUS
36 | message(STATUS "    libraries: ${CUDA_LIBRARIES}")
37 | message(STATUS "    include path: ${CUDA_INCLUDE_DIRS}")
38 | 
39 | #向工程添加CUDA头文件搜索路径
40 | include_directories(${CUDA_INCLUDE_DIRS})
41 | 
42 | #设置nvcc编译选项
43 | set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11; -g; -G;-gencode; arch=compute_86;code=sm_86)#原来是75
44 | 
45 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -D_MWAITXINTRIN_H_INCLUDED")
46 | #-Wall选项意思是编译后显示所有警告
47 | #-Ofast将不会严格遵循语言标准，除了启用所有的-O3优化选项之外，也会针对某些语言启用部分优化
48 | 
49 | #向工程添加tensorrt的头文件搜索路径
50 | include_directories(${TENSORRT_DIR}/include)#原来是\\
51 | #向工程添加tensorrt的库文件搜索路径
52 | link_directories(${TENSORRT_DIR}/lib)#原来是\\
53 | 
54 | #查找opencv依赖
55 | find_package(OpenCV QUIET  #QUIET表示disable信息输出，即命令运行时不输出相关message
56 |     NO_MODULE #cmake不在MODULE模式的路径下查找包，直接进入CONFIG模式
57 |     NO_DEFAULT_PATH
58 |     NO_CMAKE_PATH#关闭cmake特定的缓存变量这一查找顺序
59 |     NO_CMAKE_ENVIRONMENT_PATH#跳过搜索cmake特定的环境变量
60 |     NO_SYSTEM_ENVIRONMENT_PATH#跳过搜索标准的系统环境变量PATH
61 |     NO_CMAKE_PACKAGE_REGISTRY#跳过搜索存储在cmake的"User Package Registry"(用户包注册表)中的路径
62 |     NO_CMAKE_BUILDS_PATH
63 |     NO_CMAKE_SYSTEM_PATH#通过设定当前系统定义的cmake变量
64 |     NO_CMAKE_SYSTEM_PACKAGE_REGISTRY#跳过在cmake的"System Package Registry"(系统包注册表）中查找
65 | )
66 | 
67 | #输出正常 STATUS
68 | message(STATUS "OpenCV library status:")
69 | message(STATUS "    version: ${OpenCV_VERSION}")
70 | message(STATUS "    libraries: ${OpenCV_LIBS}")
71 | message(STATUS "    include path: ${OpenCV_INCLUDE_DIRS}")
72 | 
73 | #向工程添加opencv的头文件搜索路径
74 | include_directories(${OpenCV_INCLUDE_DIRS})
75 | 
76 | #对子文件夹项目进行cmake编译
77 | add_subdirectory(example)
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## 项目介绍
 2 | 
 3 | * 零代码：只需配置文件（json文件）和权重文件即可生成engin文件，实现trt部署。
 4 | * 自动化生成配置文件：由pt模型文件可以自动化生成json文件。
 5 | * 可视化网络：便于查验和原始网络（比如pytorch）的区别。
 6 | * debug教程：方便对比trt输出和pytorch模型输出的区别，从而方便定位部署上的问题。
 7 | 
 8 | # 项目结构
 9 | 
10 | ```
11 | tensorrtCV
12 |     |
13 |     |- src
14 |     |  |
15 |     |  |- plugin
16 |     |  |  |-xxx.h xxx.cu
17 |     |  |-xxx.cpp
18 |     |  |-xxx.h
19 |     |- example
20 |     |  |
21 |     |  |-ddrnet
22 |     |  | |-main.cpp
23 |     |
24 |     |  |-yolov5
25 |     |  | |-main.cpp
26 |     |
27 |     |- model
28 |     |  |-xxx.json
29 | ```
30 | 
31 | * `src` 为tensort项目的主代码目录。其中包含`plugin`目录存放啊各种plugin
32 | * `example`目录中为不同网络的demo代码
33 | * `model`目录中为各种网络的json文件。
34 | 
35 | ## 编译运行
36 | 
37 | ### win/linux
38 | 
39 | 统一使用cmake管理，**需要在CMakeLists手动修改 OpenCV_DIR TENSORRT_DIR **
40 | 
41 | 编译命令：
42 | 
43 | ```makefile
44 | mkdir build
45 | cd build
46 | cmake .. 
47 | // win下指定vs编译
48 | // cmake .. -G "Visual Studio 15 2017 Win64"
49 | make
50 | // win下vs打开tensorrtF.sln编译运行
51 | ```
52 | 
53 | ### 自动化生成json文件
54 | 
55 | * [pytorch-classification](https://github.com/AlfengYuan/pytorch-classification)
56 | 
57 | ## Comming Soon
58 | 
59 | - [ ] 小白系列教程
60 | - [ ] json网络可视化
61 | 
62 | **欢迎各位同学PR模型配置(json文件)和新功能。**
63 | **另外，请关注我的微信公众号（CV加速器），定期有直播讲解整个工程和集中回答问题。**
64 | 
65 | 


--------------------------------------------------------------------------------
/example/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | #指定CMake的最小版本要求
2 | cmake_minimum_required(VERSION 3.2)
3 | add_subdirectory(ddrnet)
4 | add_subdirectory(hrnet)
5 | add_subdirectory(yolov5)
6 | 
7 | 


--------------------------------------------------------------------------------
/example/ddrnet/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #指定CMake的最小版本要求
 2 | cmake_minimum_required(VERSION 3.2)
 3 | #定义工程名称
 4 | project(ddrnet)
 5 | 
 6 | #显示消息
 7 | message("=========", ${CMAKE_SOURCE_DIR})#CMAKE_SOURCE_DIR 工程顶层目录，绝对路径 "/home/zxl/mywork/tensorrtCV"
 8 | message("=========", ${PROJECT_SOURCE_DIR})
 9 | 
10 | #将文件组成的列表，保存到变量TRT_FILES中
11 | file(GLOB TRT_FILES
12 |     "${CMAKE_SOURCE_DIR}/src/*.h"
13 |     "${CMAKE_SOURCE_DIR}/src/*.cpp"
14 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.cu"
15 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.h"
16 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.cpp")
17 | 
18 | #向工程添加头文件搜索路径
19 | include_directories(${CMAKE_SOURCE_DIR}/src)#"/home/zxl/mywork/tensorrtCV/src"
20 | 
21 | #显式定义SOURCE_FILES ，其值为main.cpp
22 | set(SOURCE_FILES main.cpp)
23 | 
24 | #编译生成可执行文件ddrnet
25 | add_executable(ddrnet ${SOURCE_FILES} ${TRT_FILES})
26 | 
27 | #将目标文件与库文件进行链接
28 | target_link_libraries(ddrnet nvonnxparser)
29 | target_link_libraries(ddrnet nvinfer)
30 | target_link_libraries(ddrnet cudart)
31 | target_link_libraries(ddrnet ${OpenCV_LIBS})
32 | 


--------------------------------------------------------------------------------
/example/ddrnet/ReadMe.md:
--------------------------------------------------------------------------------
1 | * 使用的模型为 ddrnet23-slim
2 | * ddrnet仓库地址 https://github.com/chenjun2hao/DDRNet.pytorch
3 | 
4 | 


--------------------------------------------------------------------------------
/example/ddrnet/main.cpp:
--------------------------------------------------------------------------------
  1 | #include "trt.h"
  2 | #include "utils.h"
  3 | #include "opencv2/opencv.hpp"
  4 | 
  5 | cv::Mat createLTU(int len) {
  6 | 	cv::Mat lookUpTable(1, 256, CV_8U);
  7 | 	uchar* p = lookUpTable.data;
  8 | 	for (int j = 0; j < 256; ++j) {
  9 | 		p[j] = (j * (256 / len) > 255) ? uchar(255) : (uchar)(j * (256 / len));
 10 | 	}
 11 | 	return lookUpTable;
 12 | }
 13 | 
 14 | void test()
 15 | {
 16 |     string jsonPath = "D:/hrnet_ocr/test.json";
 17 |     trt *m_trt = new trt(jsonPath);
 18 |     m_trt->createENG();
 19 |     int batchsize = 1;
 20 |     m_trt->inference_init(batchsize, m_trt->getOutDim());
 21 |     float *a = new float[batchsize*100];
 22 |     for(int i = 0; i < batchsize*100; i++)
 23 |         a[i] = i*1.0 +1;
 24 |     float *out = new float[batchsize*56];
 25 |     m_trt->doInference(a,batchsize,out);
 26 |     for(int i = 0;i<56;i++)
 27 |     {
 28 |         cout<<out[i]<<" ";
 29 |         if(i%8 == 7)
 30 |             cout<<endl;
 31 |     }
 32 | }
 33 | void resnet(string JsonPath)
 34 | {
 35 |     trt *m_trt = new trt(JsonPath);
 36 |     m_trt->createENG();
 37 | }
 38 | 
 39 | void HRnet(std::string jsonPath)
 40 | {
 41 | 	//string jsonPath = "/mnt/f/LearningCode/ddrnet/ddrnet-slim2-wsl_infer.json";
 42 | 	trt *m_trt = new trt(jsonPath);
 43 | 	if(m_trt->param.createENG)
 44 | 	{
 45 | 		m_trt->createENG();
 46 | 		m_trt->param.outputSize = m_trt->getOutDim();
 47 | 		std::cout <<"Create engine..."<< std::endl;
 48 | 		std::cout <<"outputSize=" << m_trt->getOutDim() << std::endl;
 49 | 		// write json
 50 | 		 Json::Reader m_Reader;
 51 |     	Json::Value root;
 52 |     	ifstream fp;
 53 |     	fp.open(jsonPath,ios::binary);
 54 |     	m_Reader.parse(fp,root);
 55 | 
 56 | 		Json::StyledWriter writer;
 57 | 		root["outputSize"] = m_trt->getOutDim();
 58 | 		root["createENG"] = false;
 59 | 		ofstream os;
 60 | 		auto replacepos = jsonPath.find(".json");
 61 | 		string inferjsonPath = jsonPath.replace(replacepos, 5, "_infer.json");
 62 |     	os.open(inferjsonPath, ios::binary);
 63 | 		os << writer.write(root);
 64 | 		os.close();
 65 | 		fp.close();
 66 | 
 67 | 		return;
 68 | 	}
 69 | 	//m_trt->createENG();
 70 | 	int batchsize = m_trt->param.BatchSize;
 71 | 	m_trt->inference_init(batchsize, m_trt->param.outputSize);
 72 | 
 73 | 	vector<cv::Mat> testVal;
 74 | 	map<string, cv::Mat> dataProb;
 75 | 	vector<string> imgs;
 76 | 	cv::Mat img;
 77 | 	string pattern = m_trt->param.imgDir+ "*."+m_trt->param.imgType;
 78 | 	vector<cv::String> images_names;
 79 | 	cv::glob(pattern, images_names, false);
 80 | 	if(images_names.empty())
 81 | 	{
 82 | 		std::cout << "No img files " << std::endl;
 83 | 		return;
 84 | 	}
 85 | 	int i = 0;
 86 | 	cv::Scalar Mean = cv::Scalar(m_trt->param.mean[0], m_trt->param.mean[1], m_trt->param.mean[2]);
 87 | 	cv::Scalar Std = cv::Scalar(m_trt->param.std[0], m_trt->param.std[1], m_trt->param.std[2]);
 88 | 	cv::Size size = { m_trt->param.input_h,m_trt->param.input_w };
 89 | 	cout << size << "batch size= " << batchsize << endl;
 90 | 	int flag = 0;
 91 | 	if (m_trt->param.input_c == 3)
 92 | 	{
 93 | 		flag = 1;
 94 | 	}
 95 | 	for (auto image_name : images_names)
 96 | 	{
 97 | 		if (i < batchsize)
 98 | 		{
 99 | 			i++;
100 | 			cv::Mat Img = cv::imread(image_name, flag);
101 | 			//resize(Img, Img, size, 0, 0, cv::INTER_LINEAR);
102 | 			if (flag == 1)
103 | 			{
104 | 				cv::Mat img;
105 | 				Img.convertTo(img, CV_32FC3, 1 / 255.0);
106 | 				testVal.push_back(img);
107 | 			}
108 | 			else {
109 | 				testVal.push_back(Img);
110 | 			}
111 | 			cout << image_name << endl;
112 | 			imgs.push_back(image_name);
113 | 		}
114 | 	}
115 | 
116 | 	std::cout << "out dim : " << m_trt->param.outputSize << std::endl;
117 | 	float *data = new float[batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w];
118 | 	int *output = new int[batchsize*m_trt->param.outputSize];
119 | 
120 | 	cv::Mat Transed_t = BlobFromImages(testVal, cv::Size{ m_trt->param.input_w,m_trt->param.input_h }, Mean, Std, true, false);
121 | 	//cout<<Transed_t.size<<endl;
122 | 	//cv::Mat Transed_t = cv::dnn::blobFromImages(testVal,1.0,cv::Size{m_trt->param.input_h,m_trt->param.input_w},cv::Scalar{0});
123 | 	memcpy(data, Transed_t.data, batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w * sizeof(float));
124 | 
125 | 	
126 | 	//m_trt->doInference(data, batchsize, output); // float
127 | 	std::cout<<"param.outputSize = " << m_trt->param.outputSize << std::endl;
128 | 	m_trt->doInference_int(data, batchsize, output); // float
129 | 
130 | 	////post
131 | 	cv::Mat outimg(m_trt->param.input_h, m_trt->param.input_w, CV_8UC1);
132 | 	for (int row = 0; row < m_trt->param.input_h; ++row) {
133 | 	    uchar* uc_pixel = outimg.data + row * outimg.step;
134 | 	    for (int col = 0; col < m_trt->param.input_w; ++col) {
135 | 	        uc_pixel[col] = (uchar)output[row*m_trt->param.input_w + col];
136 | 	    }
137 | 	}
138 | 	cv::Mat im_color;
139 | 	cv::cvtColor(outimg, im_color, cv::COLOR_GRAY2RGB);
140 | 	cv::Mat lut = createLTU(19); // numclass
141 | 	cv::LUT(im_color, lut, im_color);
142 | 	// false color
143 | 	cv::cvtColor(im_color, im_color, cv::COLOR_RGB2GRAY);
144 | 	cv::applyColorMap(im_color, im_color, cv::COLORMAP_HOT);
145 | 	cv::imshow("False Color Map", im_color);
146 | 	//fusion
147 | 	//cv::Mat fusionImg;
148 | 	//cv::addWeighted(img, 1, im_color, 0.5, 1, fusionImg);
149 | 	//cv::imshow("Fusion Img", fusionImg);
150 | 	cv::waitKey(0);
151 | 
152 | 
153 | }
154 | 
155 | int main(int argc ,char** argv)
156 | {
157 |     //string JsonPath = argv[1];
158 |     //trt *m_trt = new trt(JsonPath);
159 |     //m_trt->createENG();
160 |     //int batchsize = 1;
161 |     //m_trt->inference_init(batchsize);
162 |     //float *input = new float[batchsize * m_trt->param.input_c * m_trt->param.input_h * m_trt->param.input_w];
163 |     //float *output = new float[batchsize * m_trt->param.outputSize];
164 |     //m_trt->doInference(input,1,output);
165 | 	if(argc < 2)
166 | 	{
167 | 		std::cout << "Parameter error !" << std::endl;
168 | 		return -1;
169 | 	}
170 | 	HRnet(argv[1]);
171 | 	
172 | 	return 0;
173 | }
174 | 


--------------------------------------------------------------------------------
/example/hrnet/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.2)
 2 | project(hrnet)
 3 | 
 4 | message("=========", ${CMAKE_SOURCE_DIR})
 5 | message("=========", ${PROJECT_SOURCE_DIR})
 6 | 
 7 | 
 8 | file(GLOB TRT_FILES 
 9 |     "${CMAKE_SOURCE_DIR}/src/*.h" 
10 |     "${CMAKE_SOURCE_DIR}/src/*.cpp" 
11 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.cu" 
12 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.h"
13 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.cpp")
14 | 
15 | include_directories(${CMAKE_SOURCE_DIR}/src)
16 | 
17 | set(SOURCE_FILES main.cpp)
18 | add_executable(hrnet  ${SOURCE_FILES} ${TRT_FILES})
19 | 
20 | 
21 | target_link_libraries(hrnet nvonnxparser)
22 | target_link_libraries(hrnet nvinfer)
23 | target_link_libraries(hrnet cudart)
24 | target_link_libraries(hrnet ${OpenCV_LIBS})
25 | 


--------------------------------------------------------------------------------
/example/hrnet/main.cpp:
--------------------------------------------------------------------------------
  1 | #include "trt.h"
  2 | #include "utils.h"
  3 | #include "opencv2/opencv.hpp"
  4 | 
  5 | cv::Mat createLTU(int len) {
  6 | 	cv::Mat lookUpTable(1, 256, CV_8U);
  7 | 	uchar* p = lookUpTable.data;
  8 | 	for (int j = 0; j < 256; ++j) {
  9 | 		p[j] = (j * (256 / len) > 255) ? uchar(255) : (uchar)(j * (256 / len));
 10 | 	}
 11 | 	return lookUpTable;
 12 | }
 13 | 
 14 | void HRnet(std::string jsonPath)
 15 | {
 16 | 	//string jsonPath = "/mnt/f/LearningCode/hrnet/hrnet_w48.json";
 17 | 	trt *m_trt = new trt(jsonPath);
 18 | 	if(m_trt->param.createENG)
 19 | 	{
 20 | 		m_trt->createENG();
 21 | 	}
 22 | 
 23 | 	int batchsize = m_trt->param.BatchSize;
 24 | 	m_trt->inference_init(batchsize);
 25 | 	std::cout << "out dim : " << m_trt->param.outputSize << std::endl;
 26 | 	vector<cv::Mat> testVal;
 27 | 	map<string, cv::Mat> dataProb;
 28 | 	vector<string> imgs;
 29 | 	cv::Mat img;
 30 | 	string pattern = m_trt->param.imgDir+ "*."+m_trt->param.imgType;
 31 | 	vector<cv::String> images_names;
 32 | 	cv::glob(pattern, images_names, false);
 33 | 	if(images_names.empty())
 34 | 	{
 35 | 		std::cout << "No img files " << std::endl;
 36 | 		return;
 37 | 	}
 38 | 	int i = 0;
 39 | 	cout << size << "batch size= " << batchsize << endl;
 40 | 	int flag = 0;
 41 | 	if (m_trt->param.input_c == 3)
 42 | 	{
 43 | 		flag = 1;
 44 | 	}
 45 | 	float *data = new float[batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w];
 46 | 	int *output = new int[batchsize*m_trt->param.outputSize];
 47 | 	for (auto image_name : images_names)
 48 | 	{
 49 | 		if (i < batchsize)
 50 | 		{
 51 | 			i++;
 52 | 			cv::Mat Img = cv::imread(image_name, flag);
 53 | 			//resize(Img, Img, size, 0, 0, cv::INTER_LINEAR);
 54 | 			if (flag == 1)
 55 | 			{
 56 | 				cv::Mat img;
 57 | 				Img.convertTo(img, CV_32FC3);
 58 | 				testVal.push_back(img);
 59 | 			}
 60 | 			else {
 61 | 				testVal.push_back(Img);
 62 | 			}
 63 | 			cout << image_name << endl;
 64 | 			imgs.push_back(image_name);
 65 | 		}
 66 | 	}
 67 | 
 68 | 	cv::Mat Transed_t = cv::dnn::blobFromImages(testVal,1.0,cv::Size{m_trt->param.input_h,m_trt->param.input_w},cv::Scalar{0},true);//BGR -> RGB
 69 | 	memcpy(data, Transed_t.data, batchsize*m_trt->param.input_c*m_trt->param.input_h*m_trt->param.input_w * sizeof(float));
 70 | 	
 71 | 	//m_trt->doInference(data, batchsize, output); // float
 72 | 	m_trt->doInference_int(data, batchsize, output); 
 73 | 
 74 | 	////post
 75 | 	cv::Mat outimg(m_trt->param.input_h, m_trt->param.input_w, CV_8UC1);
 76 | 	for (int row = 0; row < m_trt->param.input_h; ++row) {
 77 | 	    uchar* uc_pixel = outimg.data + row * outimg.step;
 78 | 	    for (int col = 0; col < m_trt->param.input_w; ++col) {
 79 | 	        uc_pixel[col] = (uchar)output[row*m_trt->param.input_w + col];
 80 | 	    }
 81 | 	}
 82 | 	cv::Mat im_color;
 83 | 	cv::cvtColor(outimg, im_color, cv::COLOR_GRAY2RGB);
 84 | 	cv::Mat lut = createLTU(19); // numclass
 85 | 	cv::LUT(im_color, lut, im_color);
 86 | 	// false color
 87 | 	cv::cvtColor(im_color, im_color, cv::COLOR_RGB2GRAY);
 88 | 	cv::applyColorMap(im_color, im_color, cv::COLORMAP_HOT);
 89 | 	cv::imshow("False Color Map", im_color);
 90 | 	//fusion
 91 | 	//cv::Mat fusionImg;
 92 | 	//cv::addWeighted(img, 1, im_color, 0.5, 1, fusionImg);
 93 | 	//cv::imshow("Fusion Img", fusionImg);
 94 | 	cv::waitKey(0);
 95 | 
 96 | 
 97 | }
 98 | 
 99 | int main(int argc ,char** argv)
100 | {
101 | 	if(argc < 2)
102 | 	{
103 | 		std::cout << "Parameter error !" << std::endl;
104 | 		return -1;
105 | 	}
106 | 	HRnet(argv[1]);
107 | 	
108 | 	return 0;
109 | }
110 | 


--------------------------------------------------------------------------------
/example/yolov5/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #指定CMake的最小版本要求
 2 | cmake_minimum_required(VERSION 3.2)
 3 | #定义工程名称
 4 | project(yolov5)
 5 | 
 6 | #显示消息
 7 | message("=========", ${CMAKE_SOURCE_DIR})#CMAKE_SOURCE_DIR 工程顶层目录，绝对路径 "/home/zxl/mywork/tensorrtCV"
 8 | message("=========", ${PROJECT_SOURCE_DIR})
 9 | 
10 | #将文件组成的列表，保存到变量TRT_FILES中
11 | file(GLOB TRT_FILES
12 |     "${CMAKE_SOURCE_DIR}/src/*.h"
13 |     "${CMAKE_SOURCE_DIR}/src/*.cpp"
14 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.cu"
15 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.h"
16 |     "${CMAKE_SOURCE_DIR}/src/plugin/*.cpp")
17 | 
18 | #向工程添加头文件搜索路径
19 | include_directories(${CMAKE_SOURCE_DIR}/src)
20 | include_directories(${CMAKE_SOURCE_DIR}/src/plugin)
21 | 
22 | #显式定义SOURCE_FILES ，其值为main.cpp
23 | set(SOURCE_FILES main.cpp)
24 | 
25 | #编译生成可执行文件yolov5
26 | add_executable(yolov5 ${SOURCE_FILES} ${TRT_FILES})
27 | 
28 | #将目标文件与库文件进行链接
29 | target_link_libraries(yolov5 nvonnxparser)
30 | target_link_libraries(yolov5 nvinfer)
31 | target_link_libraries(yolov5 cudart)
32 | target_link_libraries(yolov5 ${OpenCV_LIBS})
33 | 


--------------------------------------------------------------------------------
/example/yolov5/main.cpp:
--------------------------------------------------------------------------------
  1 | #include "trt.h"
  2 | #include "utils.h"
  3 | #include "yololayer.h"
  4 | #include <opencv2/opencv.hpp>
  5 | #include <chrono>
  6 | //windows下
  7 | #ifdef WIN32
  8 | #include <direct.h>
  9 | #include <io.h>
 10 | #endif
 11 | //linux下
 12 | #ifdef linux
 13 | #include <dirent.h>
 14 | #include <sys/io.h>
 15 | #include <sys/stat.h>
 16 | #include <unistd.h>
 17 | #endif
 18 | 
 19 | cv::Rect get_rect(cv::Mat& img, float bbox[4], int& INPUT_W, int& INPUT_H) {
 20 |     int l, r, t, b;
 21 |     float r_w = INPUT_W / (img.cols * 1.0);
 22 |     float r_h = INPUT_H / (img.rows * 1.0);
 23 |     if (r_h > r_w) {
 24 |         l = bbox[0] - bbox[2] / 2.f;
 25 |         r = bbox[0] + bbox[2] / 2.f;
 26 |         t = bbox[1] - bbox[3] / 2.f - (INPUT_H - r_w * img.rows) / 2;
 27 |         b = bbox[1] + bbox[3] / 2.f - (INPUT_H - r_w * img.rows) / 2;
 28 |         l = l / r_w;
 29 |         r = r / r_w;
 30 |         t = t / r_w;
 31 |         b = b / r_w;
 32 |     }
 33 |     else {
 34 |         l = bbox[0] - bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2;
 35 |         r = bbox[0] + bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2;
 36 |         t = bbox[1] - bbox[3] / 2.f;
 37 |         b = bbox[1] + bbox[3] / 2.f;
 38 |         l = l / r_h;
 39 |         r = r / r_h;
 40 |         t = t / r_h;
 41 |         b = b / r_h;
 42 |     }
 43 |     return cv::Rect(l, t, r - l, b - t);
 44 | }
 45 | 
 46 | float iou(float lbox[4], float rbox[4]) {
 47 |     float interBox[] = {
 48 |         (std::max)(lbox[0] - lbox[2] / 2.f , rbox[0] - rbox[2] / 2.f), //left
 49 |         (std::min)(lbox[0] + lbox[2] / 2.f , rbox[0] + rbox[2] / 2.f), //right
 50 |         (std::max)(lbox[1] - lbox[3] / 2.f , rbox[1] - rbox[3] / 2.f), //top
 51 |         (std::min)(lbox[1] + lbox[3] / 2.f , rbox[1] + rbox[3] / 2.f), //bottom
 52 |     };
 53 | 
 54 |     if (interBox[2] > interBox[3] || interBox[0] > interBox[1])
 55 |         return 0.0f;
 56 | 
 57 |     float interBoxS = (interBox[1] - interBox[0]) * (interBox[3] - interBox[2]);
 58 |     return interBoxS / (lbox[2] * lbox[3] + rbox[2] * rbox[3] - interBoxS);
 59 | }
 60 | 
 61 | bool cmp(const Yolo::Detection& a, const Yolo::Detection& b) {
 62 |     return a.conf > b.conf;
 63 | }
 64 | 
 65 | void nms(std::vector<Yolo::Detection>& res, float* output, int& MAX_OUTPUT_BBOX_COUNT, float conf_thresh, float nms_thresh = 0.5) {
 66 |     int det_size = sizeof(Yolo::Detection) / sizeof(float);
 67 |     std::map<float, std::vector<Yolo::Detection>> m;
 68 |     for (int i = 0; i < output[0] && i < MAX_OUTPUT_BBOX_COUNT; i++) {
 69 |         if (output[1 + det_size * i + 4] <= conf_thresh) continue;
 70 |         Yolo::Detection det;
 71 |         memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
 72 |         if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Yolo::Detection>());
 73 |         m[det.class_id].push_back(det);
 74 |         //cout<<det.conf<<det.class_id<<det.bbox[0]<<det.bbox[1]<<det.bbox[2]<<det.bbox[3];
 75 |     }
 76 |     for (auto it = m.begin(); it != m.end(); it++) {
 77 |         //std::cout << it->second[0].class_id << " --- " << std::endl;
 78 |         auto& dets = it->second;
 79 |         std::sort(dets.begin(), dets.end(), cmp);
 80 |         for (size_t m = 0; m < dets.size(); ++m) {
 81 |             auto& item = dets[m];
 82 |             res.push_back(item);
 83 |             for (size_t n = m + 1; n < dets.size(); ++n) {
 84 |                 if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
 85 |                     dets.erase(dets.begin() + n);
 86 |                     --n;
 87 |                 }
 88 |             }
 89 |         }
 90 |     }
 91 | }
 92 | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
 93 |     int w, h, x, y;
 94 |     float r_w = input_w / (img.cols * 1.0);
 95 |     float r_h = input_h / (img.rows * 1.0);
 96 |     if (r_h > r_w) {
 97 |         w = input_w;
 98 |         h = r_w * img.rows;
 99 |         x = 0;
100 |         y = (input_h - h) / 2;
101 |     }
102 |     else {
103 |         w = r_h * img.cols;
104 |         h = input_h;
105 |         x = (input_w - w) / 2;
106 |         y = 0;
107 |     }
108 |     cv::Mat re(h, w, CV_8UC3);
109 |     cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
110 |     cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
111 |     re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
112 |     return out;
113 | }
114 | void yolo_img(trt* m_trt, int& batchsize)
115 | {
116 |     vector<cv::Mat> testVal;
117 |     vector<string> imgs;
118 |     string pattern = m_trt->param.imgDir + "*." + m_trt->param.imgType;//"D:/qt_project/tensorrtCV/model/yolo/*.jpg";
119 |     vector<cv::String> images_names;
120 |     cv::glob(pattern, images_names, false);
121 | 
122 |     string outputPath = m_trt->param.imgDir + "output/";
123 | #ifdef WIN32
124 |     if (_access(outputPath.c_str(), 0) == -1)
125 |     {
126 |         _mkdir(outputPath.c_str());
127 |     }
128 | #endif
129 | #ifdef linux
130 |     if (access(outputPath.c_str(), 0) == -1)
131 |     {
132 |         mkdir(outputPath.c_str(), 0777);
133 |     }
134 | #endif
135 |     int inputH = m_trt->param.input_h;
136 |     int inputW = m_trt->param.input_w;
137 |     int flag = 1;
138 |     if (m_trt->param.input_c == 1)
139 |         flag = 0;
140 |     float* input = new float[batchsize * m_trt->param.input_c * inputH * inputW];
141 |     float* output = new float[batchsize * m_trt->param.outputSize];
142 |     int inferBatch = 0;
143 |     if (images_names.size() == 0)
144 |     {
145 |         cout << "no " << m_trt->param.imgType << " in " << m_trt->param.imgDir << endl;
146 |         return;
147 |     }
148 |     for (auto image_name : images_names)
149 |     {
150 |         cout << image_name << endl;
151 |         cv::Mat Img = cv::imread(image_name, flag);
152 |         cv::Mat img = preprocess_img(Img, inputW, inputH);
153 |         testVal.push_back(img);
154 |         imgs.push_back(image_name);
155 |         inferBatch++;
156 |         if (testVal.size() != batchsize && image_name != *(images_names.end() - 1))
157 |         {
158 |             continue;
159 |         }
160 |         cout << testVal.size() << "  " << inferBatch << endl;
161 |         cv::Mat Data = cv::dnn::blobFromImages(testVal, 1.0, cv::Size{ inputH,inputW }, cv::Scalar{ 0 }, true);//BGR -> RGB
162 | 
163 |         memcpy(input, Data.data, inferBatch * m_trt->param.input_c * inputH * inputW * sizeof(float));
164 | 
165 |         m_trt->doInference(input, inferBatch, output);
166 | 
167 |         std::vector<std::vector<Yolo::Detection>> batch_res(inferBatch);
168 |         int MAX_OUTPUT_BBOX_COUNT = (m_trt->param.outputSize - 1) / 6;
169 | 
170 |         float CONF_THRESH = 0.5;
171 |         float NMS_THRESH = 0.5;
172 | 
173 |         for (int j = 0; j < inferBatch; j++)
174 |         {
175 |             auto& res = batch_res[j];
176 |             nms(res, &output[j * m_trt->param.outputSize], MAX_OUTPUT_BBOX_COUNT, CONF_THRESH, NMS_THRESH);
177 |         }
178 | 
179 |         for (int b = 0; b < inferBatch; b++) {
180 |             auto& res = batch_res[b];
181 |             //        ofstream openfile("H:/myGitHub/tensorrtF/model/yolov5/test/trt_results.txt");
182 |             //        for(int n=0;n<res.size();n++)
183 |             //        {
184 |             //            for(int i = 0;i < 4;i++)
185 |             //            {
186 |             //                openfile<<res[n].bbox[i]<<endl;
187 |             //            }
188 |             //            openfile<<res[n].conf<<endl;
189 |             //            openfile<<res[n].class_id<<endl;
190 |             //        }
191 |             //        openfile.close();
192 |             cv::Mat img = cv::imread(imgs[b], 1);
193 |             for (size_t j = 0; j < res.size(); j++) {
194 |                 cv::Rect r = get_rect(img, res[j].bbox, inputW, inputH);
195 |                 cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
196 |                 cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0x00, 0x00), 2);
197 |             }
198 |             string outPath = imgs[b].replace(0, m_trt->param.imgDir.size(), outputPath);//"D:/qt_project/tensorrtCV/model/test/1.jpg";
199 |             cv::imwrite(outPath, img);
200 |         }
201 |         imgs.clear();
202 |         testVal.clear();
203 |         batch_res.clear();
204 |         inferBatch = 0;
205 |     }
206 | }
207 | 
208 | void yolo_vedio(trt* m_trt, int& batchsize)
209 | {
210 |     string vedios = m_trt->param.imgDir + "*" + m_trt->param.imgType;//"D:/qt_project/tensorrtCV/model/yolo/test.mp4";
211 |     vector<cv::String> vedios_names;
212 |     cv::glob(vedios, vedios_names, false);
213 |     string outputPath = m_trt->param.imgDir + "output/";
214 | #ifdef WIN32
215 |     if (_access(outputPath.c_str(), 0) == -1)
216 |     {
217 |         _mkdir(outputPath.c_str());
218 |     }
219 | #endif
220 | #ifdef linux
221 |     if (access(outputPath.c_str(), 0) == -1)
222 |     {
223 |         mkdir(outputPath.c_str(), 0777);
224 |     }
225 | #endif
226 |     int inputH = m_trt->param.input_h;
227 |     int inputW = m_trt->param.input_w;
228 |     float* input = new float[batchsize * m_trt->param.input_c * inputH * inputW];
229 |     float* output = new float[batchsize * m_trt->param.outputSize];
230 |     for (auto vedio_name : vedios_names)
231 |     {
232 |         cv::VideoCapture capture(vedio_name);
233 |         if (!capture.isOpened())
234 |         {
235 |             cout << "can not open vedio file, please check it : " << vedio_name << endl;
236 |             return;
237 |         }
238 |         cv::Size size = cv::Size((int)capture.get(cv::CAP_PROP_FRAME_WIDTH), (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT));
239 |         double fps = capture.get(cv::CAP_PROP_FPS);
240 |         double allFps = capture.get(cv::CAP_PROP_FRAME_COUNT);
241 |         cout << allFps << endl;
242 |         cv::VideoWriter writer;
243 |         string vedioOputPath = ((string)vedio_name).replace(0, m_trt->param.imgDir.size(), outputPath);
244 |         vedioOputPath = vedioOputPath.substr(0, vedioOputPath.size() - 3);
245 |         vedioOputPath = vedioOputPath + "avi";
246 |         cout << "vedio output Path :" << vedioOputPath << endl;
247 |         writer.open(vedioOputPath, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), fps, size, true);
248 |         if (!writer.isOpened())
249 |         {
250 |             cout << "write vedio error! please check it : " << vedioOputPath << endl;
251 |             return;
252 |         }
253 |         cv::Mat img;
254 |         vector<cv::Mat> testVal;
255 |         vector<cv::Mat> imgs;
256 |         auto start_time = std::chrono::system_clock::now();
257 |         int numFps = 0;
258 |         int inferBatch = 0;
259 |         while (capture.read(img)) {
260 |             imgs.push_back(img);
261 |             cv::Mat Img = preprocess_img(img, inputW, inputH);
262 |             testVal.push_back(Img);
263 |             inferBatch++;
264 |             numFps++;
265 |             if (inferBatch != batchsize && numFps < allFps)
266 |             {
267 |                 continue;
268 |             }
269 |             cv::Mat Data = cv::dnn::blobFromImages(testVal, 1.0, cv::Size{ inputH,inputW }, cv::Scalar{ 0 }, true);
270 |             memcpy(input, Data.data, inferBatch * m_trt->param.input_c * inputH * inputW * sizeof(float));
271 | 
272 |             m_trt->doInference(input, inferBatch, output);
273 | 
274 |             std::vector<std::vector<Yolo::Detection>> batch_res(inferBatch);
275 |             int MAX_OUTPUT_BBOX_COUNT = (m_trt->param.outputSize - 1) / 6;
276 | 
277 |             float CONF_THRESH = 0.5;
278 |             float NMS_THRESH = 0.5;
279 | 
280 |             for (int j = 0; j < inferBatch; j++)
281 |             {
282 |                 auto& res = batch_res[j];
283 |                 nms(res, &output[j * m_trt->param.outputSize], MAX_OUTPUT_BBOX_COUNT, CONF_THRESH, NMS_THRESH);
284 |             }
285 | 
286 |             for (int b = 0; b < inferBatch; b++) {
287 |                 auto& res = batch_res[b];
288 |                 cv::Mat writeImg = imgs[b];
289 |                 for (size_t j = 0; j < res.size(); j++) {
290 |                     cv::Rect r = get_rect(writeImg, res[j].bbox, inputW, inputH);
291 |                     cv::rectangle(writeImg, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
292 |                     cv::putText(writeImg, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0x00, 0x00), 2);
293 |                 }
294 |                 writer.write(writeImg);
295 |             }
296 | 
297 |             imgs.clear();
298 |             testVal.clear();
299 |             batch_res.clear();
300 |             inferBatch = 0;
301 | 
302 |         }
303 |         capture.release();
304 |         writer.release();
305 |         auto end_time = std::chrono::system_clock::now();
306 |         float duration;
307 |         duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
308 |         cout << "all time : " << duration << endl;
309 |     }
310 | }
311 | int main()
312 | {
313 |     string JsonPath = "D:/qt_project/tensorrtF/model/yolo/yolov5s.json";
314 |     trt* m_trt = new trt(JsonPath);
315 |     if (m_trt->param.createENG)
316 |         m_trt->createENG();
317 |     if (!m_trt->param.doInfer)
318 |         return 0;
319 |     int batchsize = m_trt->param.BatchSize;
320 |     int outputSize = m_trt->param.outputSize;
321 |     m_trt->inference_init(batchsize, outputSize);
322 |     string imgType = "jpg png bmp jpeg";
323 |     string::size_type imgIdx = imgType.find(m_trt->param.imgType);
324 |     if (imgIdx != string::npos)
325 |         yolo_img(m_trt, batchsize);
326 |     string vedioType = "mp4 avi";
327 |     string::size_type vedioIdx = vedioType.find(m_trt->param.imgType);
328 |     if (vedioIdx != string::npos)
329 |         yolo_vedio(m_trt, batchsize);
330 |     return 0;
331 | }
332 | 


--------------------------------------------------------------------------------
/getWeights.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat May 29 19:12:58 2021
 4 | 
 5 | @author: Administrator
 6 | """
 7 | import xml.etree.ElementTree as ET
 8 | import os
 9 | from PIL import Image
10 | import numpy as np
11 | import torch
12 | 
13 | from PIL import Image
14 | import torch
15 | import torchvision
16 | import struct
17 | 
18 | 
19 | 
20 | 
21 | path = "H:/myGitHub/tensorrtF/model/yolov5/"
22 | Path = os.path.join(path, "wts")
23 | if not os.path.isdir(Path):
24 |     os.makedirs(Path)
25 | def getweights(model_path):
26 |     state_dict = torch.load(model_path,map_location= lambda storage,loc :storage)
27 |     print(state_dict )
28 |     keys = [v for key,v in enumerate(state_dict)]
29 |     print(keys)
30 |     with open(os.path.join(Path,"network.txt"),'w') as fw:
31 |         for key in keys:
32 |             print("~~~~~~~~~~~ ",key)
33 |             ts = state_dict[key]
34 |             shape = ts.shape
35 |             size = shape
36 |             allsize = 1
37 |             fw.write(key + " ")
38 |             for idx in range(len(size)):
39 |                 allsize *= size[idx]
40 |                 fw.write(str(size[idx])+ " ")
41 |             fw.write('\n')
42 |             ts = ts.reshape(allsize)
43 |             with open(Path + '/'+ key + '.wgt','wb') as f:
44 |                 a = struct.pack('i',allsize)
45 |                 f.write(a)
46 |                 for i in range(allsize):
47 |                     a = struct.pack('f',ts[i])#.hex()
48 |                     f.write(a)
49 |                             
50 |                             
51 |                     
52 | if __name__ == '__main__':
53 | 
54 |     model = torch.load(path+'yolov5s.pt')['model'].float() 
55 |     torch.save(model.state_dict(),path+'yolov5s.pth')
56 |     getweights(path + "yolov5s.pth")
57 |     #model = torchvision.models.resnet50()
58 |     #model.eval()
59 |     #torch.save(model.state_dict(),r"H:\myGitHub\tensorrtF\model\resnet50\res50.pth")
60 |     #a = torch.randn(1,3,256,256).type(torch.float32)
61 |     #torch.onnx.export(model, a,r"H:\myGitHub\tensorrtF\model\resnet50\res50.onnx",training=2 )


--------------------------------------------------------------------------------
/model/ddrnet.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "BatchSize": 1,
  3 |   "maxBatchsize":1,
  4 |   "indim":3,
  5 |   "input_c": 3,
  6 |   "input_h": 512,
  7 |   "input_w": 1024,
  8 |   "ENGPath": "/mnt/f/LearningCode/ddrnet/ddrnetwsl.eng",
  9 |   "weightsDir": "/mnt/f/LearningCode/ddrnet/wts/",
 10 |   "int8": false,
 11 |   "fp16": true,
 12 |   "div_255": true,
 13 |   "Mean": [ 0.485, 0.456, 0.406 ],
 14 |   "Std": [ 0.229, 0.224, 0.225 ],
 15 |   "inputBlobName": "data",
 16 |   "outputBlobName": "output",
 17 |   "outputSize": 4194304,
 18 |   "createENG": true,
 19 |   "doInfer": false,
 20 |   "imgDir": "/mnt/f/dataset/oneimg/",
 21 |   "imgType": "png",
 22 |   "network": [
 23 |     {
 24 |       "layerStyle": "cba",
 25 |       "layerName": "relu_16",
 26 |       "inputName": "data",
 27 |       "convFile": "conv1.0",
 28 |       "biasFile":"conv1.0",
 29 |       "parameter": {
 30 |         "input_c": 3,
 31 |         "output_c": 32,
 32 |         "kernel": [ 3, 3 ],
 33 |         "padding": [ 1, 1 ],
 34 |         "stride": [ 2, 2 ]
 35 |       },
 36 |       "withBN": true,
 37 |       "bnFile": "conv1.1",
 38 |       "active_type": "relu",
 39 |       "eps": 1e-5
 40 |     },
 41 |     {
 42 |       "layerStyle": "cba",
 43 |       "layerName": "relu_19",
 44 |       "inputName": "relu_16",
 45 |       "convFile": "conv1.3",
 46 |        "biasFile":"conv1.3",
 47 |       "parameter": {
 48 |         "input_c": 32,
 49 |         "output_c": 32,
 50 |         "kernel": [ 3, 3 ],
 51 |         "padding": [ 1, 1 ],
 52 |         "stride": [ 2, 2 ]
 53 |       },
 54 |       "withBN": true,
 55 |       "bnFile": "conv1.4",
 56 |       "active_type": "relu",
 57 |       "eps": 1e-5
 58 |     },
 59 |     {
 60 |       "layerStyle": "resnet",
 61 |       "layerName": "relu_26",
 62 |       "inputName": "relu_19",
 63 |       "right":[
 64 |                [  ["layer1.0.conv1", 32, 3, 1, 1],
 65 |                   ["layer1.0.bn1"],
 66 |                   ["relu"]
 67 |                ],
 68 |                [
 69 |                 ["layer1.0.conv2", 32, 3, 1, 1],
 70 |                 ["layer1.0.bn2"]
 71 |                ]
 72 |           ],
 73 |       "active_type": "relu"
 74 |     },
 75 |     {       
 76 |       "layerStyle": "resnet",
 77 |       "layerName": "relu_33",
 78 |       "inputName": "relu_26",
 79 |       "right":[
 80 |                [  
 81 |                   ["layer1.1.conv1", 32, 3, 1, 1],
 82 |                   ["layer1.1.bn1"],
 83 |                   ["relu"]
 84 |                ],
 85 |                [
 86 |                   ["layer1.1.conv2", 32, 3, 1, 1],
 87 |                   ["layer1.1.bn2"]
 88 |                ]
 89 |           ],
 90 |       "active_type": "relu"
 91 |     },
 92 |     {
 93 |       "layerStyle": "resnet",
 94 |       "layerName": "relu_42",
 95 |       "inputName": "relu_33",
 96 |       "left":[
 97 |                [
 98 |                   ["layer2.0.conv1", 64, 3, 2, 1],
 99 |                   ["layer2.0.bn1"],
100 |                   ["relu"]
101 |                ],
102 |                [
103 |                   ["layer2.0.conv2", 64, 3, 1, 1],
104 |                   ["layer2.0.bn2"]
105 |                ]
106 |           ],
107 |        "right":[
108 |                    [  
109 |                       ["layer2.0.downsample.0", 64, 1, 2, 0],
110 |                       ["layer2.0.downsample.1"]
111 |                    ]
112 |           ],
113 |       "active_type": "relu"
114 |     },
115 |     {
116 |       "layerStyle": "resnet",
117 |       "layerName": "add_48",
118 |       "inputName": "relu_42",
119 |       "right":[
120 |                [  
121 |                   ["layer2.1.conv1", 64, 3, 1, 1],
122 |                   ["layer2.1.bn1"],
123 |                   ["relu"]
124 |                ],
125 |                [
126 |                   ["layer2.1.conv2", 64, 3, 1, 1],
127 |                   ["layer2.1.bn2"]
128 |                ]
129 |           ]
130 |     },
131 |     {
132 |       "layerStyle": "active",
133 |       "layerName": "relu_65",
134 |       "active_type": "relu",
135 |       "inputName": "add_48"
136 |     },
137 |     {
138 |       "layerStyle": "resnet",
139 |       "layerName": "relu_72",
140 |       "inputName": "relu_65",
141 |       "right":[
142 |                [  
143 |                   ["layer3_.0.conv1", 64, 3, 1, 1],
144 |                   ["layer3_.0.bn1"],
145 |                   ["relu"]
146 |                ],
147 |                [
148 |                   ["layer3_.0.conv2", 64, 3, 1, 1],
149 |                   ["layer3_.0.bn2"]
150 |                ]
151 |           ],
152 |         "active_type": "relu"
153 |     },
154 |     { 
155 |       "layerStyle": "resnet",
156 |       "layerName": "add_78",
157 |       "inputName": "relu_72",
158 |       "right":[
159 |                [  
160 |                   ["layer3_.1.conv1", 64, 3, 1, 1],
161 |                   ["layer3_.1.bn1"],
162 |                   ["relu"]
163 |                ],
164 |                [
165 |                   ["layer3_.1.conv2", 64, 3, 1, 1],
166 |                   ["layer3_.1.bn2"]
167 |                ]
168 |           ]
169 |     },
170 |     {
171 |       "layerStyle": "active",
172 |       "layerName": "relu_79",
173 |       "active_type": "relu",
174 |       "inputName": "add_78"
175 |     },
176 |     {
177 |       "layerStyle": "active",
178 |       "layerName": "relu_49",
179 |       "active_type": "relu",
180 |       "inputName": "add_48"
181 |     },
182 |     {
183 |       "layerStyle": "resnet",
184 |       "layerName": "relu_58",
185 |       "inputName": "relu_49",
186 |       "left":[
187 |                [
188 |                   ["layer3.0.conv1", 128, 3, 2, 1],
189 |                   ["layer3.0.bn1"],
190 |                   ["relu"]
191 |                ],
192 |                [
193 |                   ["layer3.0.conv2", 128, 3, 1, 1],
194 |                   ["layer3.0.bn2"]
195 |                ]
196 |           ],
197 |        "right":[
198 |                    [  
199 |                       ["layer3.0.downsample.0", 128, 1, 2, 0],
200 |                       ["layer3.0.downsample.1"]
201 |                    ]
202 |           ],
203 |       "active_type": "relu"
204 |     },
205 |     {
206 |       "layerStyle": "resnet",
207 |       "layerName": "add_64",
208 |       "inputName": "relu_58",
209 |       "right":[
210 |                [  
211 |                   ["layer3.1.conv1", 128, 3, 1, 1],
212 |                   ["layer3.1.bn1"],
213 |                   ["relu"]
214 |                ],
215 |                [
216 |                   ["layer3.1.conv2", 128, 3, 1, 1],
217 |                   ["layer3.1.bn2"]
218 |                ]
219 |           ]
220 |     },
221 |     {
222 |       "layerStyle": "cba",
223 |       "layerName": "BatchNormalization_81",
224 |       "inputName": "relu_79",
225 |       "convFile": "down3.0",
226 |       "parameter": {
227 |         "input_c": 64,
228 |         "output_c": 128,
229 |         "kernel": [ 3, 3 ],
230 |         "padding": [ 1, 1 ],
231 |         "stride": [ 2, 2 ]
232 |       },
233 |       "withBN": true,
234 |       "bnFile": "down3.1"
235 |     },
236 |     {
237 |       "layerStyle": "eltwise",
238 |       "layerName": "add_82",
239 |       "inputName": [ "add_64", "BatchNormalization_81" ],
240 |       "eltType": "kSUM"
241 |     },
242 |     {
243 |       "layerStyle": "active",
244 |       "layerName": "relu_83",
245 |       "active_type": "relu",
246 |       "inputName": "add_64"
247 |     },
248 |     {            
249 |       "layerStyle": "cba",
250 |       "layerName": "BatchNormalization_85",
251 |       "inputName": "relu_83",
252 |       "convFile": "compression3.0",
253 |       "parameter": {
254 |         "input_c": 128,
255 |         "output_c": 64,
256 |         "kernel": [ 1, 1 ],
257 |         "padding": [ 0, 0 ],
258 |         "stride": [ 1, 1 ]
259 |       },
260 |       "withBN": true,
261 |       "bnFile": "compression3.1"
262 |     },
263 |     { 
264 |       "layerStyle": "upsample",
265 |       "layerName": "Upsample_96",
266 |       "inputName": "BatchNormalization_85",
267 |       "mode": "linear",
268 |       "grid" : [64, 128]
269 |     },
270 |     {
271 |       "layerStyle": "eltwise",
272 |       "layerName": "add_97",
273 |       "inputName": [ "add_78", "Upsample_96" ],
274 |       "eltType": "kSUM"
275 |     },
276 |     {
277 |       "layerStyle": "active",
278 |       "layerName": "relu_114",
279 |       "active_type": "relu",
280 |       "inputName": "add_97"
281 |     },
282 |     {
283 |       "layerStyle": "active",
284 |       "layerName": "relu_98",
285 |       "active_type": "relu",
286 |       "inputName": "add_82"
287 |     },
288 |     {
289 |       "layerStyle": "resnet",
290 |       "layerName": "relu_107",
291 |       "inputName": "relu_98",
292 |       "left":[
293 |                [
294 |                   ["layer4.0.conv1", 256, 3, 2, 1],
295 |                   ["layer4.0.bn1"],
296 |                   ["relu"]
297 |                ],
298 |                [
299 |                   ["layer4.0.conv2", 256, 3, 1, 1],
300 |                   ["layer4.0.bn2"]
301 |                ]
302 |           ],
303 |        "right":[
304 |                    [  
305 |                       ["layer4.0.downsample.0", 256, 1, 2, 0],
306 |                       ["layer4.0.downsample.1"]
307 |                    ]
308 |           ],
309 |       "active_type": "relu"
310 |     },
311 |     {
312 |       "layerStyle": "resnet",
313 |       "layerName": "add_113",
314 |       "inputName": "relu_107",
315 |       "right":[
316 |                [  
317 |                   ["layer4.1.conv1", 256, 3, 1, 1],
318 |                   ["layer4.1.bn1"],
319 |                   ["relu"]
320 |                ],
321 |                [
322 |                   ["layer4.1.conv2", 256, 3, 1, 1],
323 |                   ["layer4.1.bn2"]
324 |                ]
325 |           ]
326 |     },
327 |     {
328 |       "layerStyle": "active",
329 |       "layerName": "relu_135",
330 |       "active_type": "relu",
331 |       "inputName": "add_113"
332 |     },
333 |     {
334 |       "layerStyle": "cba",
335 |       "layerName": "BatchNormalization_137",
336 |       "inputName": "relu_135",
337 |       "convFile": "compression4.0",
338 |       "parameter": {
339 |         "input_c": 256,
340 |         "output_c": 64,
341 |         "kernel": [ 1, 1 ],
342 |         "padding": [ 0, 0 ],
343 |         "stride": [ 1, 1 ]
344 |       },
345 |       "withBN": true,
346 |       "bnFile": "compression4.1"
347 |     },
348 |     {
349 |       "layerStyle": "resnet",
350 |       "layerName": "relu_121",
351 |       "inputName": "relu_114",
352 |       "right":[
353 |                [  
354 |                   ["layer4_.0.conv1", 64, 3, 1, 1],
355 |                   ["layer4_.0.bn1"],
356 |                   ["relu"]
357 |                ],
358 |                [
359 |                   ["layer4_.0.conv2", 64, 3, 1, 1],
360 |                   ["layer4_.0.bn2"]
361 |                ]
362 |           ],
363 |       "active_type": "relu"
364 |     },
365 |     {
366 |       "layerStyle": "resnet",
367 |       "layerName": "add_127",
368 |       "inputName": "relu_121",
369 |       "right":[
370 |                [  
371 |                   ["layer4_.1.conv1", 64, 3, 1, 1],
372 |                   ["layer4_.1.bn1"],
373 |                   ["relu"]
374 |                ],
375 |                [
376 |                   ["layer4_.1.conv2", 64, 3, 1, 1],
377 |                   ["layer4_.1.bn2"]
378 |                ]
379 |           ]
380 |     },
381 |     {
382 |       "layerStyle": "upsample",
383 |       "layerName": "Upsample_148",
384 |       "inputName": "BatchNormalization_137",
385 |       "mode": "linear",
386 |       "grid" : [64, 128]
387 |     },
388 |     {
389 |       "layerStyle": "eltwise",
390 |       "layerName": "add_149",
391 |       "inputName": [ "add_127", "Upsample_148" ],
392 |       "eltType": "kSUM"
393 |     },
394 |     {
395 |       "layerStyle": "active",
396 |       "layerName": "relu_150",
397 |       "active_type": "relu",
398 |       "inputName": "add_149"
399 |     },
400 |     {
401 |       "layerStyle": "resnet",
402 |       "layerName": "add_161",
403 |       "inputName": "relu_150",
404 |       "left":[
405 |                [
406 |                   ["layer5_.0.conv1", 64, 1, 1, 0],
407 |                   ["layer5_.0.bn1"],
408 |                   ["relu"]
409 |                ],
410 |                [
411 |                   ["layer5_.0.conv2", 64, 3, 1, 1],
412 |                   ["layer5_.0.bn2"],
413 |                   ["relu"]
414 |                ],
415 |                [
416 |                   ["layer5_.0.conv3", 128, 1, 1, 0],
417 |                   ["layer5_.0.bn3"]
418 |                ]
419 |           ],
420 |        "right":[
421 |                    [  
422 |                       ["layer5_.0.downsample.0", 128, 1, 1, 0],
423 |                       ["layer5_.0.downsample.1"]
424 |                    ]
425 |           ]
426 |     },
427 |     {
428 |       "layerStyle": "active",
429 |       "layerName": "relu_128",
430 |       "active_type": "relu",
431 |       "inputName": "add_127"
432 |     },
433 |     {
434 |       "layerStyle": "cba",
435 |       "layerName": "relu_131",
436 |       "inputName": "relu_128",
437 |       "convFile": "down4.0",
438 |       "parameter": {
439 |         "input_c": 64,
440 |         "output_c": 128,
441 |         "kernel": [ 3, 3 ],
442 |         "padding": [ 1, 1 ],
443 |         "stride": [ 2, 2 ]
444 |       },
445 |       "withBN": true,
446 |       "bnFile": "down4.1",
447 |       "active_type": "relu",
448 |       "eps": 1e-5
449 |     },
450 |     {      
451 |       "layerStyle": "cba",
452 |       "layerName": "BatchNormalization_133",
453 |       "inputName": "relu_131",
454 |       "convFile": "down4.3",
455 |       "parameter": {
456 |         "input_c": 128,
457 |         "output_c": 256,
458 |         "kernel": [ 3, 3 ],
459 |         "padding": [ 1, 1 ],
460 |         "stride": [ 2, 2 ]
461 |       },
462 |       "withBN": true,
463 |       "bnFile": "down4.4"
464 |     },
465 |     { 
466 |       "layerStyle": "eltwise",
467 |       "layerName": "add_134",
468 |       "inputName": [ "add_113", "BatchNormalization_133" ],
469 |       "eltType": "kSUM"
470 |     },
471 |     {
472 |       "layerStyle": "active",
473 |       "layerName": "relu_162",
474 |       "active_type": "relu",
475 |       "inputName": "add_134"
476 |     },
477 |     {
478 |       "layerStyle": "resnet",
479 |       "layerName": "add_173",
480 |       "inputName": "relu_162",
481 |       "left":[
482 |                [
483 |                   ["layer5.0.conv1", 256, 1, 1, 0],
484 |                   ["layer5.0.bn1"],
485 |                   ["relu"]
486 |                ],
487 |                [
488 |                   ["layer5.0.conv2", 256, 3, 2, 1],
489 |                   ["layer5.0.bn2"],
490 |                   ["relu"]
491 |                ],
492 |                [
493 |                   ["layer5.0.conv3", 512, 1, 1, 0],
494 |                   ["layer5.0.bn3"]
495 |                ]
496 |           ],
497 |        "right":[
498 |                    [  
499 |                       ["layer5.0.downsample.0", 512, 1, 2, 0],
500 |                       ["layer5.0.downsample.1"]
501 |                    ]
502 |           ]
503 |     },
504 |     {
505 |       "layerStyle": "bn",
506 |       "layerName": "BatchNormalization_180",
507 |       "inputName": "add_173",
508 |       "weightFile": "spp.scale0.0"
509 |     },
510 |     {
511 |       "layerStyle": "active",
512 |       "layerName": "relu_181",
513 |       "active_type": "relu",
514 |       "inputName": "BatchNormalization_180"
515 |     },
516 |     {
517 |       "layerStyle": "conv",
518 |       "layerName": "conv_182",
519 |       "inputName": "relu_181",
520 |       "weightFile": "spp.scale0.2",
521 |        "parameter": {
522 |         "input_c": 512,
523 |         "output_c": 128,
524 |         "kernel": [ 1, 1 ],
525 |         "padding": [ 0, 0 ],
526 |         "stride": [ 1, 1 ]
527 |       }
528 |     },
529 |     {
530 |       "layerStyle": "pool",
531 |       "layerName": "AveragePool_184",
532 |       "inputName": "add_173",
533 |       "parameter": {
534 |         "poolType": "kAVG",
535 |         "kernel": [ 5, 5 ],
536 |         "padding": [ 2, 2 ],
537 |         "stride": [ 2, 2 ]
538 |       }
539 |     },
540 |     {
541 |       "layerStyle": "bn",
542 |       "layerName": "BatchNormalization_185",
543 |       "inputName": "AveragePool_184",
544 |       "weightFile": "spp.scale1.1"
545 |     },
546 |     {
547 | 
548 |       "layerStyle": "active",
549 |       "layerName": "relu_186",
550 |       "active_type": "relu",
551 |       "inputName": "BatchNormalization_185"
552 |     },
553 |     {
554 |       "layerStyle": "conv",
555 |       "layerName": "conv_187",
556 |       "inputName": "relu_186",
557 |       "weightFile": "spp.scale1.3",
558 |        "parameter": {
559 |         "input_c": 512,
560 |         "output_c": 128,
561 |         "kernel": [ 1, 1 ],
562 |         "padding": [ 0, 0 ],
563 |         "stride": [ 1, 1 ]
564 |       }
565 |     },
566 |     {
567 |       "layerStyle": "upsample",
568 |       "layerName": "Upsample_198",
569 |       "inputName": "conv_187",
570 |       "mode": "linear",
571 |       "grid" : [8, 16]
572 |     },
573 |     {
574 |       "layerStyle": "eltwise",
575 |       "layerName": "add_199",
576 |       "inputName": [ "conv_182", "Upsample_198" ],
577 |       "eltType": "kSUM"
578 |     },
579 |      {
580 |       "layerStyle": "pool",
581 |       "layerName": "AveragePool_204",
582 |       "inputName": "add_173",
583 |       "parameter": {
584 |         "poolType": "kAVG",
585 |         "kernel": [ 9, 9 ],
586 |         "padding": [ 4, 4 ],
587 |         "stride": [ 4, 4 ]
588 |       }
589 |     },
590 |     {
591 |       "layerStyle": "bn",
592 |       "layerName": "BatchNormalization_205",
593 |       "inputName": "AveragePool_204",
594 |       "weightFile": "spp.scale2.1"
595 |     },
596 |     {
597 |       "layerStyle": "active",
598 |       "layerName": "relu_206",
599 |       "active_type": "relu",
600 |       "inputName": "BatchNormalization_205"
601 |     },
602 |     {
603 |       "layerStyle": "conv",
604 |       "layerName": "conv_207",
605 |       "inputName": "relu_206",
606 |       "weightFile": "spp.scale2.3",
607 |        "parameter": {
608 |         "input_c": 512,
609 |         "output_c": 128,
610 |         "kernel": [ 1, 1 ],
611 |         "padding": [ 0, 0 ],
612 |         "stride": [ 1, 1 ]
613 |       }
614 |     },
615 |     {
616 |       "layerStyle": "upsample",
617 |       "layerName": "Upsample_218",
618 |       "inputName": "conv_207",
619 |       "mode": "linear",
620 |       "grid" : [8, 16]
621 |     },
622 |     {
623 |       "layerStyle": "pool",
624 |       "layerName": "AveragePool_224",
625 |       "inputName": "add_173",
626 |       "parameter": {
627 |         "poolType": "kAVG",
628 |         "kernel": [ 17, 17],
629 |         "padding": [ 8, 8 ],
630 |         "stride": [ 8, 8 ]
631 |       }
632 |     },
633 |     {
634 | 
635 |       "layerStyle": "bn",
636 |       "layerName": "BatchNormalization_225",
637 |       "inputName": "AveragePool_224",
638 |       "weightFile": "spp.scale3.1"
639 |     },
640 |     {
641 |       "layerStyle": "active",
642 |       "layerName": "relu_226",
643 |       "active_type": "relu",
644 |       "inputName": "BatchNormalization_225"
645 |     },
646 |     {
647 |       "layerStyle": "conv",
648 |       "layerName": "conv_227",
649 |       "inputName": "relu_226",
650 |       "weightFile": "spp.scale3.3",
651 |        "parameter": {
652 |         "input_c": 512,
653 |         "output_c": 128,
654 |         "kernel": [ 1, 1 ],
655 |         "padding": [ 0, 0 ],
656 |         "stride": [ 1, 1 ]
657 |       }
658 |     },
659 |     {
660 |       "layerStyle": "upsample",
661 |       "layerName": "Upsample_238",
662 |       "inputName": "conv_227",
663 |       "mode": "linear",
664 |       "grid" : [8, 16]
665 |     },
666 |     {
667 |       "layerStyle": "reduce",
668 |       "layerName": "GlobalAveragePool_243",
669 |       "inputName": "add_173",
670 |       "axes": 6,
671 |       "keepD": true
672 |     },
673 |     {
674 |       "layerStyle": "bn",
675 |       "layerName": "BatchNormalization_244",
676 |       "inputName": "GlobalAveragePool_243",
677 |       "weightFile": "spp.scale4.1"
678 |     },
679 |     {
680 |       "layerStyle": "active",
681 |       "layerName": "relu_245",
682 |       "active_type": "relu",
683 |       "inputName": "BatchNormalization_244"
684 |     },
685 |     {
686 |       "layerStyle": "conv",
687 |       "layerName": "conv_246",
688 |       "inputName": "relu_245",
689 |       "weightFile": "spp.scale4.3",
690 |        "parameter": {
691 |         "input_c": 512,
692 |         "output_c": 128,
693 |         "kernel": [ 1, 1 ],
694 |         "padding": [ 0, 0 ],
695 |         "stride": [ 1, 1 ]
696 |       }
697 |     },
698 |     {
699 |       "layerStyle": "upsample",
700 |       "layerName": "Upsample_257",
701 |       "inputName": "conv_246",
702 |       "mode": "linear",
703 |       "grid" : [8, 16]
704 |     },
705 |     {
706 |       "layerStyle": "bn",
707 |       "layerName": "BatchNormalization_200",
708 |       "inputName": "add_199",
709 |       "weightFile": "spp.process1.0"
710 |     },
711 |     {
712 |       "layerStyle": "active",
713 |       "layerName": "relu_201",
714 |       "active_type": "relu",
715 |       "inputName": "BatchNormalization_200"
716 |     },
717 |     {
718 |       "layerStyle": "conv",
719 |       "layerName": "conv_202",
720 |       "inputName": "relu_201",
721 |       "weightFile": "spp.process1.2",
722 |        "parameter": {
723 |         "input_c": 128,
724 |         "output_c": 128,
725 |         "kernel": [ 3, 3 ],
726 |         "padding": [ 1, 1 ],
727 |         "stride": [ 1, 1 ]
728 |       }
729 |     },
730 |     {
731 |       "layerStyle": "eltwise",
732 |       "layerName": "add_219",
733 |       "inputName": [ "conv_202", "Upsample_218" ],
734 |       "eltType": "kSUM"
735 |     },
736 |     {
737 |       "layerStyle": "bn",
738 |       "layerName": "BatchNormalization_220",
739 |       "inputName": "add_219",
740 |       "weightFile": "spp.process2.0"
741 |     },
742 |     {
743 |       "layerStyle": "active",
744 |       "layerName": "relu_221",
745 |       "active_type": "relu",
746 |       "inputName": "BatchNormalization_220"
747 |     },
748 |     {
749 |       "layerStyle": "conv",
750 |       "layerName": "conv_222",
751 |       "inputName": "relu_221",
752 |       "weightFile": "spp.process2.2",
753 |        "parameter": {
754 |         "input_c": 128,
755 |         "output_c": 128,
756 |         "kernel": [ 3, 3 ],
757 |         "padding": [ 1, 1 ],
758 |         "stride": [ 1, 1 ]
759 |       }
760 |     },
761 |     {
762 |       "layerStyle": "eltwise",
763 |       "layerName": "add_239",
764 |       "inputName": [ "conv_222", "Upsample_238" ],
765 |       "eltType": "kSUM"
766 |     },
767 |      {
768 |       "layerStyle": "bn",
769 |       "layerName": "BatchNormalization_240",
770 |       "inputName": "add_239",
771 |       "weightFile": "spp.process3.0"
772 |     },
773 |     {
774 |       "layerStyle": "active",
775 |       "layerName": "relu_241",
776 |       "active_type": "relu",
777 |       "inputName": "BatchNormalization_240"
778 |     },
779 |     {
780 |       "layerStyle": "conv",
781 |       "layerName": "conv_242",
782 |       "inputName": "relu_241",
783 |       "weightFile": "spp.process3.2",
784 |        "parameter": {
785 |         "input_c": 128,
786 |         "output_c": 128,
787 |         "kernel": [ 3, 3 ],
788 |         "padding": [ 1, 1 ],
789 |         "stride": [ 1, 1 ]
790 |       }
791 |     },
792 |     {
793 |       "layerStyle": "eltwise",
794 |       "layerName": "add_258",
795 |       "inputName": [ "conv_242", "Upsample_257" ],
796 |       "eltType": "kSUM"
797 |     },
798 |     {
799 |       "layerStyle": "bn",
800 |       "layerName": "BatchNormalization_259",
801 |       "inputName": "add_258",
802 |       "weightFile": "spp.process4.0"
803 |     },
804 |     {
805 |       "layerStyle": "active",
806 |       "layerName": "relu_260",
807 |       "active_type": "relu",
808 |       "inputName": "BatchNormalization_259"
809 |     },
810 |     {
811 |       "layerStyle": "conv",
812 |       "layerName": "conv_261",
813 |       "inputName": "relu_260",
814 |       "weightFile": "spp.process4.2",
815 |        "parameter": {
816 |         "input_c": 128,
817 |         "output_c": 128,
818 |         "kernel": [ 3, 3 ],
819 |         "padding": [ 1, 1 ],
820 |         "stride": [ 1, 1 ]
821 |       }
822 |     },
823 |     {
824 |       "layerStyle": "concat",
825 |       "layerName": "Concat_262",
826 |       "axis": 0,
827 |       "inputName": [ "conv_182", "conv_202", "conv_222", "conv_242", "conv_261" ]
828 |     },
829 |     {
830 |       
831 |       "layerStyle": "bn",
832 |       "layerName": "BatchNormalization_263",
833 |       "inputName": "Concat_262",
834 |       "weightFile": "spp.compression.0"
835 |     },
836 |     {
837 |       "layerStyle": "active",
838 |       "layerName": "relu_264",
839 |       "active_type": "relu",
840 |       "inputName": "BatchNormalization_263"
841 |     },
842 |     {
843 |       "layerStyle": "conv",
844 |       "layerName": "conv_265",
845 |       "inputName": "relu_264",
846 |       "weightFile": "spp.compression.2",
847 |        "parameter": {
848 |         "input_c": 640,
849 |         "output_c": 128,
850 |         "kernel": [ 1, 1 ],
851 |         "padding": [ 0, 0 ],
852 |         "stride": [ 1, 1 ]
853 |       }
854 |     },
855 |     {
856 |       "layerStyle": "bn",
857 |       "layerName": "BatchNormalization_266",
858 |       "inputName": "add_173",
859 |       "weightFile": "spp.shortcut.0"
860 |     },
861 |     {
862 |       "layerStyle": "active",
863 |       "layerName": "relu_267",
864 |       "active_type": "relu",
865 |       "inputName": "BatchNormalization_266"
866 |     },
867 |     {
868 |       "layerStyle": "conv",
869 |       "layerName": "conv_268",
870 |       "inputName": "relu_267",
871 |       "weightFile": "spp.shortcut.2",
872 |        "parameter": {
873 |         "input_c": 512,
874 |         "output_c": 128,
875 |         "kernel": [ 1, 1 ],
876 |         "padding": [ 0, 0 ],
877 |         "stride": [ 1, 1 ]
878 |       }
879 |     },
880 |     {
881 |       "layerStyle": "eltwise",
882 |       "layerName": "add_269",
883 |       "inputName": [ "conv_265", "conv_268" ],
884 |       "eltType": "kSUM"
885 |     },
886 |     {
887 |       "layerStyle": "upsample",
888 |       "layerName": "Upsample_280",
889 |       "inputName": "add_269",
890 |       "mode": "linear",
891 |       "grid" : [64, 128]
892 |     },
893 |     {
894 |       "layerStyle": "eltwise",
895 |       "layerName": "add_281",
896 |       "inputName": [ "add_161", "Upsample_280" ],
897 |       "eltType": "kSUM"
898 |     },
899 |     {
900 |       "layerStyle": "bn",
901 |       "layerName": "BatchNormalization_282",
902 |       "inputName": "add_281",
903 |       "weightFile": "final_layer.bn1"
904 |     },
905 |     {
906 |       "layerStyle": "active",
907 |       "layerName": "relu_283",
908 |       "active_type": "relu",
909 |       "inputName": "BatchNormalization_282"
910 |     },
911 |     {
912 |       "layerStyle": "conv",
913 |       "layerName": "conv_284",
914 |       "inputName": "relu_283",
915 |       "weightFile": "final_layer.conv1",
916 |        "parameter": {
917 |         "input_c": 128,
918 |         "output_c": 64,
919 |         "kernel": [ 3, 3 ],
920 |         "padding": [ 1, 1 ],
921 |         "stride": [ 1, 1 ]
922 |       }
923 |     },
924 |     {
925 |       "layerStyle": "bn",
926 |       "layerName": "BatchNormalization_285",
927 |       "inputName": "conv_284",
928 |       "weightFile": "final_layer.bn2"
929 |     },
930 |     {
931 |       "layerStyle": "active",
932 |       "layerName": "relu_286",
933 |       "active_type": "relu",
934 |       "inputName": "BatchNormalization_285"
935 |     },
936 |     {
937 |       "layerStyle": "conv",
938 |       "layerName": "conv_287",
939 |       "inputName": "relu_286",
940 |       "weightFile": "final_layer.conv2",
941 |        "parameter": {
942 |         "input_c": 64,
943 |         "output_c": 19,
944 |         "kernel": [ 1, 1 ],
945 |         "padding": [ 0, 0 ],
946 |         "stride": [ 1, 1 ]
947 |       }
948 |     },
949 |     {
950 |       "layerStyle": "upsample",
951 |       "layerName": "Upsample_final",
952 |       "inputName": "conv_287",
953 |       "mode": "linear",
954 |       "grid" : [512, 1024]
955 |     },
956 |     {
957 |                 "outputName": "output", 
958 |       "layerStyle": "topk",
959 |       "layerName": "top_max",
960 |       "inputName": "Upsample_final",
961 |       "TopKOperation": "kMAX",
962 |       "k": 1,
963 |       "reduceAxes": 1,
964 |       "outputIndex": 1
965 |     }
966 |   ]
967 | 
968 | }
969 | 


--------------------------------------------------------------------------------
/model/ddrnet/ddrnet.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "BatchSize": 1,
  3 |   "maxBatchsize":1,
  4 |   "indim":3,
  5 |   "input_c": 3,
  6 |   "input_h": 512,
  7 |   "input_w": 1024,
  8 |   "ENGPath": "/mnt/f/LearningCode/ddrnet/ddrnetwsl.eng",
  9 |   "weightsDir": "/mnt/f/LearningCode/ddrnet/wts/",
 10 |   "int8": false,
 11 |   "fp16": true,
 12 |   "div_255": true,
 13 |   "Mean": [ 0.485, 0.456, 0.406 ],
 14 |   "Std": [ 0.229, 0.224, 0.225 ],
 15 |   "inputBlobName": "data",
 16 |   "outputBlobName": "output",
 17 |   "outputSize": 4194304,
 18 |   "createENG": true,
 19 |   "doInfer": false,
 20 |   "imgDir": "/mnt/f/dataset/oneimg/",
 21 |   "imgType": "png",
 22 |   "network": [
 23 |     {
 24 |       "layerStyle": "cba",
 25 |       "layerName": "relu_16",
 26 |       "inputName": "data",
 27 |       "convFile": "conv1.0",
 28 |       "biasFile":"conv1.0",
 29 |       "parameter": {
 30 |         "input_c": 3,
 31 |         "output_c": 32,
 32 |         "kernel": [ 3, 3 ],
 33 |         "padding": [ 1, 1 ],
 34 |         "stride": [ 2, 2 ]
 35 |       },
 36 |       "withBN": true,
 37 |       "bnFile": "conv1.1",
 38 |       "active_type": "relu",
 39 |       "eps": 1e-5
 40 |     },
 41 |     {
 42 |       "layerStyle": "cba",
 43 |       "layerName": "relu_19",
 44 |       "inputName": "relu_16",
 45 |       "convFile": "conv1.3",
 46 |        "biasFile":"conv1.3",
 47 |       "parameter": {
 48 |         "input_c": 32,
 49 |         "output_c": 32,
 50 |         "kernel": [ 3, 3 ],
 51 |         "padding": [ 1, 1 ],
 52 |         "stride": [ 2, 2 ]
 53 |       },
 54 |       "withBN": true,
 55 |       "bnFile": "conv1.4",
 56 |       "active_type": "relu",
 57 |       "eps": 1e-5
 58 |     },
 59 |     {
 60 |       "layerStyle": "resnet",
 61 |       "layerName": "relu_26",
 62 |       "inputName": "relu_19",
 63 |       "right":[
 64 |                [  ["layer1.0.conv1", 32, 3, 1, 1],
 65 |                   ["layer1.0.bn1"],
 66 |                   ["relu"]
 67 |                ],
 68 |                [
 69 |                 ["layer1.0.conv2", 32, 3, 1, 1],
 70 |                 ["layer1.0.bn2"]
 71 |                ]
 72 |           ],
 73 |       "active_type": "relu"
 74 |     },
 75 |     {       
 76 |       "layerStyle": "resnet",
 77 |       "layerName": "relu_33",
 78 |       "inputName": "relu_26",
 79 |       "right":[
 80 |                [  
 81 |                   ["layer1.1.conv1", 32, 3, 1, 1],
 82 |                   ["layer1.1.bn1"],
 83 |                   ["relu"]
 84 |                ],
 85 |                [
 86 |                   ["layer1.1.conv2", 32, 3, 1, 1],
 87 |                   ["layer1.1.bn2"]
 88 |                ]
 89 |           ],
 90 |       "active_type": "relu"
 91 |     },
 92 |     {
 93 |       "layerStyle": "resnet",
 94 |       "layerName": "relu_42",
 95 |       "inputName": "relu_33",
 96 |       "left":[
 97 |                [
 98 |                   ["layer2.0.conv1", 64, 3, 2, 1],
 99 |                   ["layer2.0.bn1"],
100 |                   ["relu"]
101 |                ],
102 |                [
103 |                   ["layer2.0.conv2", 64, 3, 1, 1],
104 |                   ["layer2.0.bn2"]
105 |                ]
106 |           ],
107 |        "right":[
108 |                    [  
109 |                       ["layer2.0.downsample.0", 64, 1, 2, 0],
110 |                       ["layer2.0.downsample.1"]
111 |                    ]
112 |           ],
113 |       "active_type": "relu"
114 |     },
115 |     {
116 |       "layerStyle": "resnet",
117 |       "layerName": "add_48",
118 |       "inputName": "relu_42",
119 |       "right":[
120 |                [  
121 |                   ["layer2.1.conv1", 64, 3, 1, 1],
122 |                   ["layer2.1.bn1"],
123 |                   ["relu"]
124 |                ],
125 |                [
126 |                   ["layer2.1.conv2", 64, 3, 1, 1],
127 |                   ["layer2.1.bn2"]
128 |                ]
129 |           ]
130 |     },
131 |     {
132 |       "layerStyle": "active",
133 |       "layerName": "relu_65",
134 |       "active_type": "relu",
135 |       "inputName": "add_48"
136 |     },
137 |     {
138 |       "layerStyle": "resnet",
139 |       "layerName": "relu_72",
140 |       "inputName": "relu_65",
141 |       "right":[
142 |                [  
143 |                   ["layer3_.0.conv1", 64, 3, 1, 1],
144 |                   ["layer3_.0.bn1"],
145 |                   ["relu"]
146 |                ],
147 |                [
148 |                   ["layer3_.0.conv2", 64, 3, 1, 1],
149 |                   ["layer3_.0.bn2"]
150 |                ]
151 |           ],
152 |         "active_type": "relu"
153 |     },
154 |     { 
155 |       "layerStyle": "resnet",
156 |       "layerName": "add_78",
157 |       "inputName": "relu_72",
158 |       "right":[
159 |                [  
160 |                   ["layer3_.1.conv1", 64, 3, 1, 1],
161 |                   ["layer3_.1.bn1"],
162 |                   ["relu"]
163 |                ],
164 |                [
165 |                   ["layer3_.1.conv2", 64, 3, 1, 1],
166 |                   ["layer3_.1.bn2"]
167 |                ]
168 |           ]
169 |     },
170 |     {
171 |       "layerStyle": "active",
172 |       "layerName": "relu_79",
173 |       "active_type": "relu",
174 |       "inputName": "add_78"
175 |     },
176 |     {
177 |       "layerStyle": "active",
178 |       "layerName": "relu_49",
179 |       "active_type": "relu",
180 |       "inputName": "add_48"
181 |     },
182 |     {
183 |       "layerStyle": "resnet",
184 |       "layerName": "relu_58",
185 |       "inputName": "relu_49",
186 |       "left":[
187 |                [
188 |                   ["layer3.0.conv1", 128, 3, 2, 1],
189 |                   ["layer3.0.bn1"],
190 |                   ["relu"]
191 |                ],
192 |                [
193 |                   ["layer3.0.conv2", 128, 3, 1, 1],
194 |                   ["layer3.0.bn2"]
195 |                ]
196 |           ],
197 |        "right":[
198 |                    [  
199 |                       ["layer3.0.downsample.0", 128, 1, 2, 0],
200 |                       ["layer3.0.downsample.1"]
201 |                    ]
202 |           ],
203 |       "active_type": "relu"
204 |     },
205 |     {
206 |       "layerStyle": "resnet",
207 |       "layerName": "add_64",
208 |       "inputName": "relu_58",
209 |       "right":[
210 |                [  
211 |                   ["layer3.1.conv1", 128, 3, 1, 1],
212 |                   ["layer3.1.bn1"],
213 |                   ["relu"]
214 |                ],
215 |                [
216 |                   ["layer3.1.conv2", 128, 3, 1, 1],
217 |                   ["layer3.1.bn2"]
218 |                ]
219 |           ]
220 |     },
221 |     {
222 |       "layerStyle": "cba",
223 |       "layerName": "BatchNormalization_81",
224 |       "inputName": "relu_79",
225 |       "convFile": "down3.0",
226 |       "parameter": {
227 |         "input_c": 64,
228 |         "output_c": 128,
229 |         "kernel": [ 3, 3 ],
230 |         "padding": [ 1, 1 ],
231 |         "stride": [ 2, 2 ]
232 |       },
233 |       "withBN": true,
234 |       "bnFile": "down3.1"
235 |     },
236 |     {
237 |       "layerStyle": "eltwise",
238 |       "layerName": "add_82",
239 |       "inputName": [ "add_64", "BatchNormalization_81" ],
240 |       "eltType": "kSUM"
241 |     },
242 |     {
243 |       "layerStyle": "active",
244 |       "layerName": "relu_83",
245 |       "active_type": "relu",
246 |       "inputName": "add_64"
247 |     },
248 |     {            
249 |       "layerStyle": "cba",
250 |       "layerName": "BatchNormalization_85",
251 |       "inputName": "relu_83",
252 |       "convFile": "compression3.0",
253 |       "parameter": {
254 |         "input_c": 128,
255 |         "output_c": 64,
256 |         "kernel": [ 1, 1 ],
257 |         "padding": [ 0, 0 ],
258 |         "stride": [ 1, 1 ]
259 |       },
260 |       "withBN": true,
261 |       "bnFile": "compression3.1"
262 |     },
263 |     { 
264 |       "layerStyle": "upsample",
265 |       "layerName": "Upsample_96",
266 |       "inputName": "BatchNormalization_85",
267 |       "mode": "linear",
268 |       "grid" : [64, 128]
269 |     },
270 |     {
271 |       "layerStyle": "eltwise",
272 |       "layerName": "add_97",
273 |       "inputName": [ "add_78", "Upsample_96" ],
274 |       "eltType": "kSUM"
275 |     },
276 |     {
277 |       "layerStyle": "active",
278 |       "layerName": "relu_114",
279 |       "active_type": "relu",
280 |       "inputName": "add_97"
281 |     },
282 |     {
283 |       "layerStyle": "active",
284 |       "layerName": "relu_98",
285 |       "active_type": "relu",
286 |       "inputName": "add_82"
287 |     },
288 |     {
289 |       "layerStyle": "resnet",
290 |       "layerName": "relu_107",
291 |       "inputName": "relu_98",
292 |       "left":[
293 |                [
294 |                   ["layer4.0.conv1", 256, 3, 2, 1],
295 |                   ["layer4.0.bn1"],
296 |                   ["relu"]
297 |                ],
298 |                [
299 |                   ["layer4.0.conv2", 256, 3, 1, 1],
300 |                   ["layer4.0.bn2"]
301 |                ]
302 |           ],
303 |        "right":[
304 |                    [  
305 |                       ["layer4.0.downsample.0", 256, 1, 2, 0],
306 |                       ["layer4.0.downsample.1"]
307 |                    ]
308 |           ],
309 |       "active_type": "relu"
310 |     },
311 |     {
312 |       "layerStyle": "resnet",
313 |       "layerName": "add_113",
314 |       "inputName": "relu_107",
315 |       "right":[
316 |                [  
317 |                   ["layer4.1.conv1", 256, 3, 1, 1],
318 |                   ["layer4.1.bn1"],
319 |                   ["relu"]
320 |                ],
321 |                [
322 |                   ["layer4.1.conv2", 256, 3, 1, 1],
323 |                   ["layer4.1.bn2"]
324 |                ]
325 |           ]
326 |     },
327 |     {
328 |       "layerStyle": "active",
329 |       "layerName": "relu_135",
330 |       "active_type": "relu",
331 |       "inputName": "add_113"
332 |     },
333 |     {
334 |       "layerStyle": "cba",
335 |       "layerName": "BatchNormalization_137",
336 |       "inputName": "relu_135",
337 |       "convFile": "compression4.0",
338 |       "parameter": {
339 |         "input_c": 256,
340 |         "output_c": 64,
341 |         "kernel": [ 1, 1 ],
342 |         "padding": [ 0, 0 ],
343 |         "stride": [ 1, 1 ]
344 |       },
345 |       "withBN": true,
346 |       "bnFile": "compression4.1"
347 |     },
348 |     {
349 |       "layerStyle": "resnet",
350 |       "layerName": "relu_121",
351 |       "inputName": "relu_114",
352 |       "right":[
353 |                [  
354 |                   ["layer4_.0.conv1", 64, 3, 1, 1],
355 |                   ["layer4_.0.bn1"],
356 |                   ["relu"]
357 |                ],
358 |                [
359 |                   ["layer4_.0.conv2", 64, 3, 1, 1],
360 |                   ["layer4_.0.bn2"]
361 |                ]
362 |           ],
363 |       "active_type": "relu"
364 |     },
365 |     {
366 |       "layerStyle": "resnet",
367 |       "layerName": "add_127",
368 |       "inputName": "relu_121",
369 |       "right":[
370 |                [  
371 |                   ["layer4_.1.conv1", 64, 3, 1, 1],
372 |                   ["layer4_.1.bn1"],
373 |                   ["relu"]
374 |                ],
375 |                [
376 |                   ["layer4_.1.conv2", 64, 3, 1, 1],
377 |                   ["layer4_.1.bn2"]
378 |                ]
379 |           ]
380 |     },
381 |     {
382 |       "layerStyle": "upsample",
383 |       "layerName": "Upsample_148",
384 |       "inputName": "BatchNormalization_137",
385 |       "mode": "linear",
386 |       "grid" : [64, 128]
387 |     },
388 |     {
389 |       "layerStyle": "eltwise",
390 |       "layerName": "add_149",
391 |       "inputName": [ "add_127", "Upsample_148" ],
392 |       "eltType": "kSUM"
393 |     },
394 |     {
395 |       "layerStyle": "active",
396 |       "layerName": "relu_150",
397 |       "active_type": "relu",
398 |       "inputName": "add_149"
399 |     },
400 |     {
401 |       "layerStyle": "resnet",
402 |       "layerName": "add_161",
403 |       "inputName": "relu_150",
404 |       "left":[
405 |                [
406 |                   ["layer5_.0.conv1", 64, 1, 1, 0],
407 |                   ["layer5_.0.bn1"],
408 |                   ["relu"]
409 |                ],
410 |                [
411 |                   ["layer5_.0.conv2", 64, 3, 1, 1],
412 |                   ["layer5_.0.bn2"],
413 |                   ["relu"]
414 |                ],
415 |                [
416 |                   ["layer5_.0.conv3", 128, 1, 1, 0],
417 |                   ["layer5_.0.bn3"]
418 |                ]
419 |           ],
420 |        "right":[
421 |                    [  
422 |                       ["layer5_.0.downsample.0", 128, 1, 1, 0],
423 |                       ["layer5_.0.downsample.1"]
424 |                    ]
425 |           ]
426 |     },
427 |     {
428 |       "layerStyle": "active",
429 |       "layerName": "relu_128",
430 |       "active_type": "relu",
431 |       "inputName": "add_127"
432 |     },
433 |     {
434 |       "layerStyle": "cba",
435 |       "layerName": "relu_131",
436 |       "inputName": "relu_128",
437 |       "convFile": "down4.0",
438 |       "parameter": {
439 |         "input_c": 64,
440 |         "output_c": 128,
441 |         "kernel": [ 3, 3 ],
442 |         "padding": [ 1, 1 ],
443 |         "stride": [ 2, 2 ]
444 |       },
445 |       "withBN": true,
446 |       "bnFile": "down4.1",
447 |       "active_type": "relu",
448 |       "eps": 1e-5
449 |     },
450 |     {      
451 |       "layerStyle": "cba",
452 |       "layerName": "BatchNormalization_133",
453 |       "inputName": "relu_131",
454 |       "convFile": "down4.3",
455 |       "parameter": {
456 |         "input_c": 128,
457 |         "output_c": 256,
458 |         "kernel": [ 3, 3 ],
459 |         "padding": [ 1, 1 ],
460 |         "stride": [ 2, 2 ]
461 |       },
462 |       "withBN": true,
463 |       "bnFile": "down4.4"
464 |     },
465 |     { 
466 |       "layerStyle": "eltwise",
467 |       "layerName": "add_134",
468 |       "inputName": [ "add_113", "BatchNormalization_133" ],
469 |       "eltType": "kSUM"
470 |     },
471 |     {
472 |       "layerStyle": "active",
473 |       "layerName": "relu_162",
474 |       "active_type": "relu",
475 |       "inputName": "add_134"
476 |     },
477 |     {
478 |       "layerStyle": "resnet",
479 |       "layerName": "add_173",
480 |       "inputName": "relu_162",
481 |       "left":[
482 |                [
483 |                   ["layer5.0.conv1", 256, 1, 1, 0],
484 |                   ["layer5.0.bn1"],
485 |                   ["relu"]
486 |                ],
487 |                [
488 |                   ["layer5.0.conv2", 256, 3, 2, 1],
489 |                   ["layer5.0.bn2"],
490 |                   ["relu"]
491 |                ],
492 |                [
493 |                   ["layer5.0.conv3", 512, 1, 1, 0],
494 |                   ["layer5.0.bn3"]
495 |                ]
496 |           ],
497 |        "right":[
498 |                    [  
499 |                       ["layer5.0.downsample.0", 512, 1, 2, 0],
500 |                       ["layer5.0.downsample.1"]
501 |                    ]
502 |           ]
503 |     },
504 |     {
505 |       "layerStyle": "bn",
506 |       "layerName": "BatchNormalization_180",
507 |       "inputName": "add_173",
508 |       "weightFile": "spp.scale0.0"
509 |     },
510 |     {
511 |       "layerStyle": "active",
512 |       "layerName": "relu_181",
513 |       "active_type": "relu",
514 |       "inputName": "BatchNormalization_180"
515 |     },
516 |     {
517 |       "layerStyle": "conv",
518 |       "layerName": "conv_182",
519 |       "inputName": "relu_181",
520 |       "weightFile": "spp.scale0.2",
521 |        "parameter": {
522 |         "input_c": 512,
523 |         "output_c": 128,
524 |         "kernel": [ 1, 1 ],
525 |         "padding": [ 0, 0 ],
526 |         "stride": [ 1, 1 ]
527 |       }
528 |     },
529 |     {
530 |       "layerStyle": "pool",
531 |       "layerName": "AveragePool_184",
532 |       "inputName": "add_173",
533 |       "parameter": {
534 |         "poolType": "kAVG",
535 |         "kernel": [ 5, 5 ],
536 |         "padding": [ 2, 2 ],
537 |         "stride": [ 2, 2 ]
538 |       }
539 |     },
540 |     {
541 |       "layerStyle": "bn",
542 |       "layerName": "BatchNormalization_185",
543 |       "inputName": "AveragePool_184",
544 |       "weightFile": "spp.scale1.1"
545 |     },
546 |     {
547 | 
548 |       "layerStyle": "active",
549 |       "layerName": "relu_186",
550 |       "active_type": "relu",
551 |       "inputName": "BatchNormalization_185"
552 |     },
553 |     {
554 |       "layerStyle": "conv",
555 |       "layerName": "conv_187",
556 |       "inputName": "relu_186",
557 |       "weightFile": "spp.scale1.3",
558 |        "parameter": {
559 |         "input_c": 512,
560 |         "output_c": 128,
561 |         "kernel": [ 1, 1 ],
562 |         "padding": [ 0, 0 ],
563 |         "stride": [ 1, 1 ]
564 |       }
565 |     },
566 |     {
567 |       "layerStyle": "upsample",
568 |       "layerName": "Upsample_198",
569 |       "inputName": "conv_187",
570 |       "mode": "linear",
571 |       "grid" : [8, 16]
572 |     },
573 |     {
574 |       "layerStyle": "eltwise",
575 |       "layerName": "add_199",
576 |       "inputName": [ "conv_182", "Upsample_198" ],
577 |       "eltType": "kSUM"
578 |     },
579 |      {
580 |       "layerStyle": "pool",
581 |       "layerName": "AveragePool_204",
582 |       "inputName": "add_173",
583 |       "parameter": {
584 |         "poolType": "kAVG",
585 |         "kernel": [ 9, 9 ],
586 |         "padding": [ 4, 4 ],
587 |         "stride": [ 4, 4 ]
588 |       }
589 |     },
590 |     {
591 |       "layerStyle": "bn",
592 |       "layerName": "BatchNormalization_205",
593 |       "inputName": "AveragePool_204",
594 |       "weightFile": "spp.scale2.1"
595 |     },
596 |     {
597 |       "layerStyle": "active",
598 |       "layerName": "relu_206",
599 |       "active_type": "relu",
600 |       "inputName": "BatchNormalization_205"
601 |     },
602 |     {
603 |       "layerStyle": "conv",
604 |       "layerName": "conv_207",
605 |       "inputName": "relu_206",
606 |       "weightFile": "spp.scale2.3",
607 |        "parameter": {
608 |         "input_c": 512,
609 |         "output_c": 128,
610 |         "kernel": [ 1, 1 ],
611 |         "padding": [ 0, 0 ],
612 |         "stride": [ 1, 1 ]
613 |       }
614 |     },
615 |     {
616 |       "layerStyle": "upsample",
617 |       "layerName": "Upsample_218",
618 |       "inputName": "conv_207",
619 |       "mode": "linear",
620 |       "grid" : [8, 16]
621 |     },
622 |     {
623 |       "layerStyle": "pool",
624 |       "layerName": "AveragePool_224",
625 |       "inputName": "add_173",
626 |       "parameter": {
627 |         "poolType": "kAVG",
628 |         "kernel": [ 17, 17],
629 |         "padding": [ 8, 8 ],
630 |         "stride": [ 8, 8 ]
631 |       }
632 |     },
633 |     {
634 | 
635 |       "layerStyle": "bn",
636 |       "layerName": "BatchNormalization_225",
637 |       "inputName": "AveragePool_224",
638 |       "weightFile": "spp.scale3.1"
639 |     },
640 |     {
641 |       "layerStyle": "active",
642 |       "layerName": "relu_226",
643 |       "active_type": "relu",
644 |       "inputName": "BatchNormalization_225"
645 |     },
646 |     {
647 |       "layerStyle": "conv",
648 |       "layerName": "conv_227",
649 |       "inputName": "relu_226",
650 |       "weightFile": "spp.scale3.3",
651 |        "parameter": {
652 |         "input_c": 512,
653 |         "output_c": 128,
654 |         "kernel": [ 1, 1 ],
655 |         "padding": [ 0, 0 ],
656 |         "stride": [ 1, 1 ]
657 |       }
658 |     },
659 |     {
660 |       "layerStyle": "upsample",
661 |       "layerName": "Upsample_238",
662 |       "inputName": "conv_227",
663 |       "mode": "linear",
664 |       "grid" : [8, 16]
665 |     },
666 |     {
667 |       "layerStyle": "reduce",
668 |       "layerName": "GlobalAveragePool_243",
669 |       "inputName": "add_173",
670 |       "axes": 6,
671 |       "keepD": true
672 |     },
673 |     {
674 |       "layerStyle": "bn",
675 |       "layerName": "BatchNormalization_244",
676 |       "inputName": "GlobalAveragePool_243",
677 |       "weightFile": "spp.scale4.1"
678 |     },
679 |     {
680 |       "layerStyle": "active",
681 |       "layerName": "relu_245",
682 |       "active_type": "relu",
683 |       "inputName": "BatchNormalization_244"
684 |     },
685 |     {
686 |       "layerStyle": "conv",
687 |       "layerName": "conv_246",
688 |       "inputName": "relu_245",
689 |       "weightFile": "spp.scale4.3",
690 |        "parameter": {
691 |         "input_c": 512,
692 |         "output_c": 128,
693 |         "kernel": [ 1, 1 ],
694 |         "padding": [ 0, 0 ],
695 |         "stride": [ 1, 1 ]
696 |       }
697 |     },
698 |     {
699 |       "layerStyle": "upsample",
700 |       "layerName": "Upsample_257",
701 |       "inputName": "conv_246",
702 |       "mode": "linear",
703 |       "grid" : [8, 16]
704 |     },
705 |     {
706 |       "layerStyle": "bn",
707 |       "layerName": "BatchNormalization_200",
708 |       "inputName": "add_199",
709 |       "weightFile": "spp.process1.0"
710 |     },
711 |     {
712 |       "layerStyle": "active",
713 |       "layerName": "relu_201",
714 |       "active_type": "relu",
715 |       "inputName": "BatchNormalization_200"
716 |     },
717 |     {
718 |       "layerStyle": "conv",
719 |       "layerName": "conv_202",
720 |       "inputName": "relu_201",
721 |       "weightFile": "spp.process1.2",
722 |        "parameter": {
723 |         "input_c": 128,
724 |         "output_c": 128,
725 |         "kernel": [ 3, 3 ],
726 |         "padding": [ 1, 1 ],
727 |         "stride": [ 1, 1 ]
728 |       }
729 |     },
730 |     {
731 |       "layerStyle": "eltwise",
732 |       "layerName": "add_219",
733 |       "inputName": [ "conv_202", "Upsample_218" ],
734 |       "eltType": "kSUM"
735 |     },
736 |     {
737 |       "layerStyle": "bn",
738 |       "layerName": "BatchNormalization_220",
739 |       "inputName": "add_219",
740 |       "weightFile": "spp.process2.0"
741 |     },
742 |     {
743 |       "layerStyle": "active",
744 |       "layerName": "relu_221",
745 |       "active_type": "relu",
746 |       "inputName": "BatchNormalization_220"
747 |     },
748 |     {
749 |       "layerStyle": "conv",
750 |       "layerName": "conv_222",
751 |       "inputName": "relu_221",
752 |       "weightFile": "spp.process2.2",
753 |        "parameter": {
754 |         "input_c": 128,
755 |         "output_c": 128,
756 |         "kernel": [ 3, 3 ],
757 |         "padding": [ 1, 1 ],
758 |         "stride": [ 1, 1 ]
759 |       }
760 |     },
761 |     {
762 |       "layerStyle": "eltwise",
763 |       "layerName": "add_239",
764 |       "inputName": [ "conv_222", "Upsample_238" ],
765 |       "eltType": "kSUM"
766 |     },
767 |      {
768 |       "layerStyle": "bn",
769 |       "layerName": "BatchNormalization_240",
770 |       "inputName": "add_239",
771 |       "weightFile": "spp.process3.0"
772 |     },
773 |     {
774 |       "layerStyle": "active",
775 |       "layerName": "relu_241",
776 |       "active_type": "relu",
777 |       "inputName": "BatchNormalization_240"
778 |     },
779 |     {
780 |       "layerStyle": "conv",
781 |       "layerName": "conv_242",
782 |       "inputName": "relu_241",
783 |       "weightFile": "spp.process3.2",
784 |        "parameter": {
785 |         "input_c": 128,
786 |         "output_c": 128,
787 |         "kernel": [ 3, 3 ],
788 |         "padding": [ 1, 1 ],
789 |         "stride": [ 1, 1 ]
790 |       }
791 |     },
792 |     {
793 |       "layerStyle": "eltwise",
794 |       "layerName": "add_258",
795 |       "inputName": [ "conv_242", "Upsample_257" ],
796 |       "eltType": "kSUM"
797 |     },
798 |     {
799 |       "layerStyle": "bn",
800 |       "layerName": "BatchNormalization_259",
801 |       "inputName": "add_258",
802 |       "weightFile": "spp.process4.0"
803 |     },
804 |     {
805 |       "layerStyle": "active",
806 |       "layerName": "relu_260",
807 |       "active_type": "relu",
808 |       "inputName": "BatchNormalization_259"
809 |     },
810 |     {
811 |       "layerStyle": "conv",
812 |       "layerName": "conv_261",
813 |       "inputName": "relu_260",
814 |       "weightFile": "spp.process4.2",
815 |        "parameter": {
816 |         "input_c": 128,
817 |         "output_c": 128,
818 |         "kernel": [ 3, 3 ],
819 |         "padding": [ 1, 1 ],
820 |         "stride": [ 1, 1 ]
821 |       }
822 |     },
823 |     {
824 |       "layerStyle": "concat",
825 |       "layerName": "Concat_262",
826 |       "axis": 0,
827 |       "inputName": [ "conv_182", "conv_202", "conv_222", "conv_242", "conv_261" ]
828 |     },
829 |     {
830 |       
831 |       "layerStyle": "bn",
832 |       "layerName": "BatchNormalization_263",
833 |       "inputName": "Concat_262",
834 |       "weightFile": "spp.compression.0"
835 |     },
836 |     {
837 |       "layerStyle": "active",
838 |       "layerName": "relu_264",
839 |       "active_type": "relu",
840 |       "inputName": "BatchNormalization_263"
841 |     },
842 |     {
843 |       "layerStyle": "conv",
844 |       "layerName": "conv_265",
845 |       "inputName": "relu_264",
846 |       "weightFile": "spp.compression.2",
847 |        "parameter": {
848 |         "input_c": 640,
849 |         "output_c": 128,
850 |         "kernel": [ 1, 1 ],
851 |         "padding": [ 0, 0 ],
852 |         "stride": [ 1, 1 ]
853 |       }
854 |     },
855 |     {
856 |       "layerStyle": "bn",
857 |       "layerName": "BatchNormalization_266",
858 |       "inputName": "add_173",
859 |       "weightFile": "spp.shortcut.0"
860 |     },
861 |     {
862 |       "layerStyle": "active",
863 |       "layerName": "relu_267",
864 |       "active_type": "relu",
865 |       "inputName": "BatchNormalization_266"
866 |     },
867 |     {
868 |       "layerStyle": "conv",
869 |       "layerName": "conv_268",
870 |       "inputName": "relu_267",
871 |       "weightFile": "spp.shortcut.2",
872 |        "parameter": {
873 |         "input_c": 512,
874 |         "output_c": 128,
875 |         "kernel": [ 1, 1 ],
876 |         "padding": [ 0, 0 ],
877 |         "stride": [ 1, 1 ]
878 |       }
879 |     },
880 |     {
881 |       "layerStyle": "eltwise",
882 |       "layerName": "add_269",
883 |       "inputName": [ "conv_265", "conv_268" ],
884 |       "eltType": "kSUM"
885 |     },
886 |     {
887 |       "layerStyle": "upsample",
888 |       "layerName": "Upsample_280",
889 |       "inputName": "add_269",
890 |       "mode": "linear",
891 |       "grid" : [64, 128]
892 |     },
893 |     {
894 |       "layerStyle": "eltwise",
895 |       "layerName": "add_281",
896 |       "inputName": [ "add_161", "Upsample_280" ],
897 |       "eltType": "kSUM"
898 |     },
899 |     {
900 |       "layerStyle": "bn",
901 |       "layerName": "BatchNormalization_282",
902 |       "inputName": "add_281",
903 |       "weightFile": "final_layer.bn1"
904 |     },
905 |     {
906 |       "layerStyle": "active",
907 |       "layerName": "relu_283",
908 |       "active_type": "relu",
909 |       "inputName": "BatchNormalization_282"
910 |     },
911 |     {
912 |       "layerStyle": "conv",
913 |       "layerName": "conv_284",
914 |       "inputName": "relu_283",
915 |       "weightFile": "final_layer.conv1",
916 |        "parameter": {
917 |         "input_c": 128,
918 |         "output_c": 64,
919 |         "kernel": [ 3, 3 ],
920 |         "padding": [ 1, 1 ],
921 |         "stride": [ 1, 1 ]
922 |       }
923 |     },
924 |     {
925 |       "layerStyle": "bn",
926 |       "layerName": "BatchNormalization_285",
927 |       "inputName": "conv_284",
928 |       "weightFile": "final_layer.bn2"
929 |     },
930 |     {
931 |       "layerStyle": "active",
932 |       "layerName": "relu_286",
933 |       "active_type": "relu",
934 |       "inputName": "BatchNormalization_285"
935 |     },
936 |     {
937 |       "layerStyle": "conv",
938 |       "layerName": "conv_287",
939 |       "inputName": "relu_286",
940 |       "weightFile": "final_layer.conv2",
941 |        "parameter": {
942 |         "input_c": 64,
943 |         "output_c": 19,
944 |         "kernel": [ 1, 1 ],
945 |         "padding": [ 0, 0 ],
946 |         "stride": [ 1, 1 ]
947 |       }
948 |     },
949 |     {
950 |       "layerStyle": "upsample",
951 |       "layerName": "Upsample_final",
952 |       "inputName": "conv_287",
953 |       "mode": "linear",
954 |       "grid" : [512, 1024]
955 |     },
956 |     {
957 |                 "outputName": "output", 
958 |       "layerStyle": "topk",
959 |       "layerName": "top_max",
960 |       "inputName": "Upsample_final",
961 |       "TopKOperation": "kMAX",
962 |       "k": 1,
963 |       "reduceAxes": 1,
964 |       "outputIndex": 1
965 |     }
966 |   ]
967 | 
968 | }
969 | 


--------------------------------------------------------------------------------
/model/deeplabV3_res50.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "input_c": 3,
  3 |   "input_h": 640,
  4 |   "input_w": 640,
  5 |   "ENGPath": "D:/deeplabV3/yunsheng/20210422/mv.pg",
  6 |   "weightsDir": "D:/deeplabV3/yunsheng/20210422/wts/",
  7 |   "fp16": true,
  8 |   "int8": false,
  9 |   "div_255": true,
 10 |   "Mean": [ 0.485, 0.456, 0.406 ],
 11 |   "Std": [ 0.229, 0.224, 0.225 ],
 12 |   "inputBlobName": "data",
 13 |   "outputBlobName": "output",
 14 |   "maxBatchsize": 10,
 15 |   "outputSize": 409600,
 16 |   "network": [
 17 |     //{
 18 |     //  "layerStyle": "M_S",
 19 |     //  "layerName": "preInput",
 20 |     //  "inputName": "data",
 21 |     //  "div255": true,
 22 |     //  "Mean": [ 0.485, 0.456, 0.406 ],
 23 |     //  "Std": [ 0.229, 0.224, 0.225 ]
 24 |     //},
 25 |     {
 26 |       "layerStyle": "cba",
 27 |       "layerName": "conv_0",
 28 |       "inputName": "data",
 29 |       "convFile": "0.0.conv1",
 30 |       "parameter": {
 31 |         "input_c": 3,
 32 |         "output_c": 64,
 33 |         "kernel": [ 7, 7 ],
 34 |         "padding": [ 3, 3 ],
 35 |         "stride": [ 2, 2 ]
 36 |       },
 37 |       "bnFile": "0.0.bn1",
 38 |       "active_type": "relu"
 39 |     },
 40 |     {
 41 |       "layerStyle": "pool",
 42 |       "layerName": "maxpool_3",
 43 |       "inputName": "conv_0",
 44 |       "parameter": {
 45 |         "poolType": "kMAX",
 46 |         "kernel": [ 3, 3 ],
 47 |         "padding": [ 1, 1 ],
 48 |         "stride": [ 2, 2 ]
 49 |       }
 50 |     },
 51 |     {
 52 |       "layerStyle": "cba",
 53 |       "layerName": "conv_4",
 54 |       "inputName": "maxpool_3",
 55 |       "convFile": "0.0.layer1.0.conv1",
 56 |       "parameter": {
 57 |         "input_c": 64,
 58 |         "output_c": 64,
 59 |         "kernel": [ 1, 1 ],
 60 |         "padding": [ 0, 0 ],
 61 |         "stride": [ 1, 1 ]
 62 |       },
 63 |       "bnFile": "0.0.layer1.0.bn1",
 64 |       "active_type": "relu"
 65 |     },
 66 |     {
 67 |       "layerStyle": "cba",
 68 |       "layerName": "conv_7",
 69 |       "inputName": "conv_4",
 70 |       "convFile": "0.0.layer1.0.conv2",
 71 |       "parameter": {
 72 |         "input_c": 64,
 73 |         "output_c": 64,
 74 |         "kernel": [ 3, 3 ],
 75 |         "padding": [ 1, 1 ],
 76 |         "stride": [ 1, 1 ]
 77 |       },
 78 |       "bnFile": "0.0.layer1.0.bn2",
 79 |       "active_type": "relu"
 80 |     },
 81 |     {
 82 |       "layerStyle": "cba",
 83 |       "layerName": "conv_10",
 84 |       "inputName": "conv_7",
 85 |       "convFile": "0.0.layer1.0.conv3",
 86 |       "parameter": {
 87 |         "input_c": 64,
 88 |         "output_c": 256,
 89 |         "kernel": [ 1, 1 ],
 90 |         "padding": [ 0, 0 ],
 91 |         "stride": [ 1, 1 ]
 92 |       },
 93 |       "bnFile": "0.0.layer1.0.bn3"
 94 |     },
 95 |     {
 96 |       "layerStyle": "cba",
 97 |       "layerName": "conv_12",
 98 |       "inputName": "maxpool_3",
 99 |       "convFile": "0.0.layer1.0.downsample.0",
100 |       "parameter": {
101 |         "input_c": 64,
102 |         "output_c": 256,
103 |         "kernel": [ 1, 1 ],
104 |         "padding": [ 0, 0 ],
105 |         "stride": [ 1, 1 ]
106 |       },
107 |       "bnFile": "0.0.layer1.0.downsample.1"
108 |     },
109 |     {
110 |       "layerStyle": "eltwise",
111 |       "layerName": "Add_14",
112 |       "inputName": [ "conv_10", "conv_12" ],
113 |       "eltType": "kSUM"
114 |     },
115 |     {
116 |       "layerStyle": "active",
117 |       "layerName": "relu_15",
118 |       "active_type": "relu",
119 |       "inputName": "Add_14"
120 |     },
121 |     {
122 |       "layerStyle": "resnet3",
123 |       "layerName": "resnet1_1",
124 |       "inputName": "relu_15",
125 |       "downsample": false,
126 |       "weightsFile": "0.0.layer1.1",
127 |       "parameter": {
128 |         "input_c": 256,
129 |         "temp_c": 64,
130 |         "output_c": 256
131 |       }
132 |     },
133 |     {
134 |       "layerStyle": "resnet3",
135 |       "layerName": "resnet1_2",
136 |       "inputName": "resnet1_1",
137 |       "downsample": false,
138 | 
139 |       "weightsFile": "0.0.layer1.2",
140 |       "parameter": {
141 |         "input_c": 256,
142 |         "temp_c": 64,
143 |         "output_c": 256
144 |       }
145 |     }, //resnet1_end
146 |     {
147 |       "layerStyle": "resnet3",
148 |       "layerName": "resnet2_0",
149 |       "inputName": "resnet1_2",
150 |       "downsample": true,
151 |       "weightsFile": "0.0.layer2.0",
152 |       "parameter": {
153 |         "input_c": 256,
154 |         "temp_c": 128,
155 |         "output_c": 512
156 |       }
157 |     },
158 |     {
159 |       "layerStyle": "resnet3",
160 |       "layerName": "resnet2_1",
161 |       "inputName": "resnet2_0",
162 |       "downsample": false,
163 |       "weightsFile": "0.0.layer2.1",
164 |       "parameter": {
165 |         "input_c": 512,
166 |         "temp_c": 128,
167 |         "output_c": 512
168 |       }
169 |     },
170 |     {
171 |       "layerStyle": "resnet3",
172 |       "layerName": "resnet2_2",
173 |       "inputName": "resnet2_1",
174 |       "downsample": false,
175 |       "weightsFile": "0.0.layer2.2",
176 |       "parameter": {
177 |         "input_c": 512,
178 |         "temp_c": 128,
179 |         "output_c": 512
180 |       }
181 |     },
182 |     {
183 |       "layerStyle": "resnet3",
184 |       "layerName": "resnet2_3",
185 |       "inputName": "resnet2_2",
186 |       "downsample": false,
187 | 
188 |       "weightsFile": "0.0.layer2.3",
189 |       "parameter": {
190 |         "input_c": 512,
191 |         "temp_c": 128,
192 |         "output_c": 512
193 |       }
194 |     }, //resnet2_end
195 |     {
196 |       "layerStyle": "resnet3",
197 |       "layerName": "resnet3_0",
198 |       "inputName": "resnet2_3",
199 |       "downsample": true,
200 |       "weightsFile": "0.0.layer3.0",
201 |       "parameter": {
202 |         "input_c": 512,
203 |         "temp_c": 256,
204 |         "output_c": 1024
205 |       }
206 |     },
207 |     {
208 |       "layerStyle": "resnet3",
209 |       "layerName": "resnet3_1",
210 |       "inputName": "resnet3_0",
211 |       "downsample": false,
212 |       "weightsFile": "0.0.layer3.1",
213 |       "parameter": {
214 |         "input_c": 1024,
215 |         "temp_c": 256,
216 |         "output_c": 1024
217 |       }
218 |     },
219 |     {
220 |       "layerStyle": "resnet3",
221 |       "layerName": "resnet3_2",
222 |       "inputName": "resnet3_1",
223 |       "downsample": false,
224 |       "weightsFile": "0.0.layer3.2",
225 |       "parameter": {
226 |         "input_c": 1024,
227 |         "temp_c": 256,
228 |         "output_c": 1024
229 |       }
230 |     },
231 |     {
232 |       "layerStyle": "resnet3",
233 |       "layerName": "resnet3_3",
234 |       "inputName": "resnet3_2",
235 |       "downsample": false,
236 |       "weightsFile": "0.0.layer3.3",
237 |       "parameter": {
238 |         "input_c": 1024,
239 |         "temp_c": 256,
240 |         "output_c": 1024
241 |       }
242 |     },
243 |     {
244 |       "layerStyle": "resnet3",
245 |       "layerName": "resnet3_4",
246 |       "inputName": "resnet3_3",
247 |       "downsample": false,
248 |       "weightsFile": "0.0.layer3.4",
249 |       "parameter": {
250 |         "input_c": 1024,
251 |         "temp_c": 256,
252 |         "output_c": 1024
253 |       }
254 |     },
255 |     {
256 |       "layerStyle": "resnet3",
257 |       "layerName": "resnet3_5",
258 |       "inputName": "resnet3_4",
259 |       "downsample": false,
260 | 
261 |       "weightsFile": "0.0.layer3.5",
262 |       "parameter": {
263 |         "input_c": 1024,
264 |         "temp_c": 256,
265 |         "output_c": 1024
266 |       }
267 |     }, //resnet3_end
268 |     {
269 |       "layerStyle": "resnet",
270 |       "layerName": "resnet4_0",
271 |       "inputName": "resnet3_5",
272 |       "left": [
273 |         [
274 |           [ "0.0.layer4.0.conv1", 512, 1, 1, 0, 2 ],
275 |           [ "0.0.layer4.0.bn1", 1e-5 ],
276 |           [ "relu" ]
277 |         ],
278 |         [
279 |           [ "0.0.layer4.0.conv2", 512, 3, 1, 2, 2 ],
280 |           [ "0.0.layer4.0.bn2", 1e-5 ],
281 |           [ "relu" ]
282 |         ],
283 |         [
284 |           [ "0.0.layer4.0.conv3", 2048, 1, 1, 0, 2 ],
285 |           [ "0.0.layer4.0.bn3", 1e-5 ],
286 |           [ "relu" ]
287 |         ]
288 |       ],
289 |       "right": [
290 |         [
291 |           [ "0.0.layer4.0.downsample.0", 2048, 1, 1, 0, 2 ],
292 |           [ "0.0.layer4.0.downsample.1", 1e-5 ],
293 |           [ "relu" ]
294 |         ]
295 |       ],
296 |       "active_type": "relu"
297 |     },
298 |     {
299 |       "layerStyle": "resnet",
300 |       "layerName": "resnet4_1",
301 |       "inputName": "resnet4_0",
302 |       "left": [
303 |         [
304 |           [ "0.0.layer4.1.conv1", 512, 1, 1, 0 ],
305 |           [ "0.0.layer4.1.bn1", 1e-5 ],
306 |           [ "relu" ]
307 |         ],
308 |         [
309 |           [ "0.0.layer4.1.conv2", 512, 3, 1, 4, 4 ],
310 |           [ "0.0.layer4.1.bn2", 1e-5 ],
311 |           [ "relu" ]
312 |         ],
313 |         [
314 |           [ "0.0.layer4.1.conv3", 2048, 1, 1, 0 ],
315 |           [ "0.0.layer4.1.bn3", 1e-5 ],
316 |           [ "relu" ]
317 |         ]
318 |       ],
319 |       "active_type": "relu"
320 |     },
321 |     {
322 |       "layerStyle": "resnet",
323 |       "layerName": "resnet4_2",
324 |       "inputName": "resnet4_1",
325 |       "left": [
326 |         [
327 |           [ "0.0.layer4.2.conv1", 512, 1, 1, 0 ],
328 |           [ "0.0.layer4.2.bn1", 1e-5 ],
329 |           [ "relu" ]
330 |         ],
331 |         [
332 |           [ "0.0.layer4.2.conv2", 512, 3, 1, 8, 8 ],
333 |           [ "0.0.layer4.2.bn2", 1e-5 ],
334 |           [ "relu" ]
335 |         ],
336 |         [
337 |           [ "0.0.layer4.2.conv3", 2048, 1, 1, 0 ],
338 |           [ "0.0.layer4.2.bn3", 1e-5 ],
339 |           [ "relu" ]
340 |         ]
341 |       ],
342 |       "active_type": "relu"
343 |     }, //resnet4_end
344 |     {
345 |       "layerStyle": "cba",
346 |       "layerName": "aspp0",
347 |       "inputName": "resnet4_2",
348 | 
349 |       "convFile": "0.1.convs.0.0",
350 |       "parameter": {
351 |         "input_c": 2048,
352 |         "output_c": 256,
353 |         "kernel": [ 1, 1 ],
354 |         "padding": [ 0, 0 ],
355 |         "stride": [ 1, 1 ]
356 |       },
357 |       "bnFile": "0.1.convs.0.1",
358 |       "active_type": "relu"
359 |     },
360 |     {
361 |       "layerStyle": "cba",
362 |       "layerName": "aspp1",
363 | 
364 |       "inputName": "resnet4_2",
365 |       "convFile": "0.1.convs.1.0",
366 |       "parameter": {
367 |         "input_c": 2048,
368 |         "output_c": 256,
369 |         "kernel": [ 3, 3 ],
370 |         "padding": [ 6, 6 ],
371 |         "stride": [ 1, 1 ],
372 |         "dilations": [ 6, 6 ]
373 |       },
374 |       "bnFile": "0.1.convs.1.1",
375 |       "active_type": "relu"
376 |     },
377 |     {
378 |       "layerStyle": "cba",
379 |       "layerName": "aspp2",
380 | 
381 |       "inputName": "resnet4_2",
382 |       "convFile": "0.1.convs.2.0",
383 |       "parameter": {
384 |         "input_c": 2048,
385 |         "output_c": 256,
386 |         "kernel": [ 3, 3 ],
387 |         "padding": [ 12, 12 ],
388 |         "stride": [ 1, 1 ],
389 |         "dilations": [ 12, 12 ]
390 |       },
391 |       "bnFile": "0.1.convs.2.1",
392 |       "active_type": "relu"
393 |     },
394 |     {
395 |       "layerStyle": "cba",
396 |       "layerName": "aspp3",
397 | 
398 |       "inputName": "resnet4_2",
399 |       "convFile": "0.1.convs.3.0",
400 |       "parameter": {
401 |         "input_c": 2048,
402 |         "output_c": 256,
403 |         "kernel": [ 3, 3 ],
404 |         "padding": [ 18, 18 ],
405 |         "stride": [ 1, 1 ],
406 |         "dilations": [ 18, 18 ]
407 |       },
408 |       "bnFile": "0.1.convs.3.1",
409 |       "active_type": "relu"
410 |     },
411 |     {
412 |       "layerStyle": "reduce",
413 |       "layerName": "avgPool",
414 |       "inputName": "resnet4_2",
415 |       "keepD": true,
416 |       "axes": 6
417 |     },
418 |     {
419 |       "layerStyle": "cba",
420 |       "layerName": "aspp4",
421 |       "inputName": "avgPool",
422 |       "convFile": "0.1.convs.4.1",
423 |       "parameter": {
424 |         "input_c": 2048,
425 |         "output_c": 256,
426 |         "kernel": [ 1, 1 ],
427 |         "padding": [ 0, 0 ],
428 |         "stride": [ 1, 1 ]
429 |       },
430 |       "bnFile": "0.1.convs.4.2",
431 |       "active_type": "relu"
432 |     },
433 |     {
434 |       "layerStyle": "upsample",
435 |       "layerName": "aspp4_up",
436 |       "inputName": "aspp4",
437 |       "mode": "bilinear",
438 |       "grid": [ 40 ]
439 |     },
440 |     {
441 |       "layerStyle": "concat",
442 |       "layerName": "aspp_cat",
443 |       "axis": 0,
444 |       "inputName": [ "aspp0", "aspp1", "aspp2", "aspp3", "aspp4_up" ]
445 |     },
446 |     {
447 |       "layerStyle": "cba",
448 |       "layerName": "cls_aspp",
449 |       "inputName": "aspp_cat",
450 | 
451 |       "convFile": "0.1.project.0",
452 |       "parameter": {
453 |         "input_c": 1280,
454 |         "output_c": 256,
455 |         "kernel": [ 1, 1 ],
456 |         "padding": [ 0, 0 ],
457 |         "stride": [ 1, 1 ]
458 |       },
459 |       "bnFile": "0.1.project.1",
460 |       "active_type": "relu"
461 |     },
462 |     {
463 |       "layerStyle": "upsample",
464 |       "layerName": "cls_aspp_up",
465 | 
466 |       "inputName": "cls_aspp",
467 |       "mode": "bilinear",
468 |       "grid": [ 160 ]
469 |     },
470 |     {
471 |       "layerStyle": "cba",
472 |       "layerName": "res_aspp",
473 | 
474 |       "inputName": "resnet1_2",
475 |       "convFile": "1.neck.0.lateral_block.conv",
476 |       "parameter": {
477 |         "input_c": 256,
478 |         "output_c": 48,
479 |         "kernel": [ 1, 1 ],
480 |         "padding": [ 0, 0 ],
481 |         "stride": [ 1, 1 ]
482 |       },
483 |       "bnFile": "1.neck.0.lateral_block.bn",
484 |       "active_type": "relu"
485 |     },
486 |     {
487 |       "layerStyle": "concat",
488 |       "layerName": "project_cat",
489 | 
490 |       "axis": 0,
491 |       "inputName": [ "cls_aspp_up", "res_aspp" ]
492 |     },
493 |     {
494 |       "layerStyle": "cba",
495 |       "layerName": "class_1",
496 |       "inputName": "project_cat",
497 | 
498 |       "convFile": "2.block.0.block.0.conv",
499 |       "parameter": {
500 |         "input_c": 304,
501 |         "output_c": 256,
502 |         "kernel": [ 3, 3 ],
503 |         "padding": [ 1, 1 ],
504 |         "stride": [ 1, 1 ]
505 |       },
506 |       "bnFile": "2.block.0.block.0.bn",
507 |       "active_type": "relu"
508 |     },
509 |     {
510 |       "layerStyle": "cba",
511 |       "layerName": "class_2",
512 |       "inputName": "class_1",
513 | 
514 |       "convFile": "2.block.0.block.1.conv",
515 |       "parameter": {
516 |         "input_c": 256,
517 |         "output_c": 256,
518 |         "kernel": [ 3, 3 ],
519 |         "padding": [ 1, 1 ],
520 |         "stride": [ 1, 1 ]
521 |       },
522 |       "bnFile": "2.block.0.block.1.bn",
523 |       "active_type": "relu"
524 |     },
525 |     {
526 |       "layerStyle": "conv",
527 |       "layerName": "class",
528 |       "inputName": "class_2",
529 |       "weightFile": "2.block.1",
530 |       "biasFile": "2.block.1",
531 | 
532 |       "parameter": {
533 |         "input_c": 256,
534 |         "output_c": 15,
535 |         "kernel": [ 1, 1 ],
536 |         "padding": [ 0, 0 ],
537 |         "stride": [ 1, 1 ]
538 |       }
539 |     },
540 |     {
541 |       "layerStyle": "upsample",
542 |       "layerName": "cls_up",
543 |       "inputName": "class",
544 |       "mode": "linear",
545 | 
546 |       "grid": [ 640 ]
547 |     },
548 |     {
549 |       "layerStyle": "softmax",
550 |       "layerName": "soft_max",
551 |       "inputName": "cls_up",
552 | 
553 |       "axes": 1
554 |     },
555 |     {
556 |       "layerStyle": "topk",
557 |       "layerName": "top_max",
558 |       "inputName": "soft_max",
559 |       "outputName": "output",
560 |       "TopKOperation": "kMAX",
561 |       "k": 1,
562 |       "reduceAxes": 1,
563 |       "outputIndex": 1
564 |     }
565 |   ]
566 | }
567 | 


--------------------------------------------------------------------------------
/model/resnet50.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "input_c": 3,
  3 |   "input_h": 256,
  4 |   "input_w": 256,
  5 |   "ENGPath": "H:/myGitHub/tensorrtF/model/resnet50/res50.eng",
  6 |   "weightsDir": "H:/myGitHub/tensorrtF/model/resnet50/wts/",
  7 |   "onnxPath": "H:/myGitHub/tensorrtF/model/resnet50/res50.onnx",
  8 |   "fp16": true,
  9 |   //"int8": true,
 10 |   //"cali_txt": "H:/myGitHub/tensorrtF/model/resnet50/cali.txt",
 11 |   //"cali_table": "H:/myGitHub/tensorrtF/model/resnet50/cali.table",
 12 |   //"div_255": true,
 13 |   //"Mean": [ 0.485, 0.456, 0.406 ],
 14 |   //"Std": [ 0.229, 0.224, 0.225 ],
 15 |   "inputBlobName": "data",
 16 |   "outputBlobName": "output",
 17 |   "maxBatchsize": 10,
 18 |   "outputSize": 1000,
 19 |   "network": [
 20 |     {
 21 |       "layerStyle": "cba",
 22 |       "layerName": "conv_0",
 23 |       "inputName": "data",
 24 |       "convFile": "conv1",
 25 |       "parameter": {
 26 |         "output_c": 64,
 27 |         "kernel": [ 7 ],
 28 |         "padding": [ 3 ],
 29 |         "stride": [ 2 ]
 30 |       },
 31 |       "bnFile": "bn1",
 32 |       "active_type": "relu"
 33 |     },
 34 |     {
 35 |       "layerStyle": "pool",
 36 |       "layerName": "maxpool_3",
 37 |       "inputName": "conv_0",
 38 |       "parameter": {
 39 |         "poolType": "kMAX",
 40 |         "kernel": [ 3, 3 ],
 41 |         "padding": [ 1, 1 ],
 42 |         "stride": [ 2, 2 ]
 43 |       }
 44 |     },
 45 |     {
 46 |       "layerStyle": "cba",
 47 |       "layerName": "conv_4",
 48 |       "inputName": "maxpool_3",
 49 |       "convFile": "layer1.0.conv1",
 50 |       "parameter": {
 51 |         "output_c": 64,
 52 |         "kernel": [ 1, 1 ],
 53 |         "padding": [ 0, 0 ],
 54 |         "stride": [ 1, 1 ]
 55 |       },
 56 |       "bnFile": "layer1.0.bn1",
 57 |       "active_type": "relu"
 58 |     },
 59 |     {
 60 |       "layerStyle": "cba",
 61 |       "layerName": "conv_7",
 62 |       "inputName": "conv_4",
 63 |       "convFile": "layer1.0.conv2",
 64 |       "parameter": {
 65 |         "output_c": 64,
 66 |         "kernel": [ 3, 3 ],
 67 |         "padding": [ 1, 1 ],
 68 |         "stride": [ 1, 1 ]
 69 |       },
 70 |       "bnFile": "layer1.0.bn2",
 71 |       "active_type": "relu"
 72 |     },
 73 |     {
 74 |       "layerStyle": "cba",
 75 |       "layerName": "conv_10",
 76 |       "inputName": "conv_7",
 77 |       "convFile": "layer1.0.conv3",
 78 |       "parameter": {
 79 |         "input_c": 64,
 80 |         "output_c": 256,
 81 |         "kernel": [ 1, 1 ],
 82 |         "padding": [ 0, 0 ],
 83 |         "stride": [ 1, 1 ]
 84 |       },
 85 |       "bnFile": "layer1.0.bn3"
 86 |     },
 87 |     {
 88 |       "layerStyle": "cba",
 89 |       "layerName": "conv_12",
 90 |       "inputName": "maxpool_3",
 91 |       "convFile": "layer1.0.downsample.0",
 92 |       "parameter": {
 93 |         "input_c": 64,
 94 |         "output_c": 256,
 95 |         "kernel": [ 1, 1 ],
 96 |         "padding": [ 0, 0 ],
 97 |         "stride": [ 1, 1 ]
 98 |       },
 99 |       "bnFile": "layer1.0.downsample.1"
100 |     },
101 |     {
102 |       "layerStyle": "eltwise",
103 |       "layerName": "Add_14",
104 |       "inputName": [ "conv_10", "conv_12" ],
105 |       "eltType": "kSUM"
106 |     },
107 |     {
108 |       "layerStyle": "active",
109 |       "layerName": "relu_15",
110 |       "active_type": "relu",
111 |       "inputName": "Add_14"
112 |     },
113 |     {
114 |       "layerStyle": "resnet3",
115 |       "layerName": "resnet1_1",
116 |       "inputName": "relu_15",
117 |       "downsample": false,
118 |       "weightsFile": "layer1.1",
119 |       "parameter": {
120 |         "input_c": 256,
121 |         "temp_c": 64,
122 |         "output_c": 256
123 |       }
124 |     },
125 |     {
126 |       "layerStyle": "resnet3",
127 |       "layerName": "resnet1_2",
128 |       "inputName": "resnet1_1",
129 |       "downsample": false,
130 | 
131 |       "weightsFile": "layer1.2",
132 |       "parameter": {
133 |         "input_c": 256,
134 |         "temp_c": 64,
135 |         "output_c": 256
136 |       }
137 |     }, //resnet1_end
138 |     {
139 |       "layerStyle": "resnet3",
140 |       "layerName": "resnet2_0",
141 |       "inputName": "resnet1_2",
142 |       "downsample": true,
143 |       "weightsFile": "layer2.0",
144 |       "parameter": {
145 |         "input_c": 256,
146 |         "temp_c": 128,
147 |         "output_c": 512
148 |       }
149 |     },
150 |     {
151 |       "layerStyle": "resnet3",
152 |       "layerName": "resnet2_1",
153 |       "inputName": "resnet2_0",
154 |       "downsample": false,
155 |       "weightsFile": "layer2.1",
156 |       "parameter": {
157 |         "input_c": 512,
158 |         "temp_c": 128,
159 |         "output_c": 512
160 |       }
161 |     },
162 |     {
163 |       "layerStyle": "resnet3",
164 |       "layerName": "resnet2_2",
165 |       "inputName": "resnet2_1",
166 |       "downsample": false,
167 |       "weightsFile": "layer2.2",
168 |       "parameter": {
169 |         "input_c": 512,
170 |         "temp_c": 128,
171 |         "output_c": 512
172 |       }
173 |     },
174 |     {
175 |       "layerStyle": "resnet3",
176 |       "layerName": "resnet2_3",
177 |       "inputName": "resnet2_2",
178 |       "downsample": false,
179 | 
180 |       "weightsFile": "layer2.3",
181 |       "parameter": {
182 |         "input_c": 512,
183 |         "temp_c": 128,
184 |         "output_c": 512
185 |       }
186 |     }, //resnet2_end
187 |     {
188 |       "layerStyle": "resnet3",
189 |       "layerName": "resnet3_0",
190 |       "inputName": "resnet2_3",
191 |       "downsample": true,
192 |       "weightsFile": "layer3.0",
193 |       "parameter": {
194 |         "input_c": 512,
195 |         "temp_c": 256,
196 |         "output_c": 1024
197 |       }
198 |     },
199 |     {
200 |       "layerStyle": "resnet3",
201 |       "layerName": "resnet3_1",
202 |       "inputName": "resnet3_0",
203 |       "downsample": false,
204 |       "weightsFile": "layer3.1",
205 |       "parameter": {
206 |         "input_c": 1024,
207 |         "temp_c": 256,
208 |         "output_c": 1024
209 |       }
210 |     },
211 |     {
212 |       "layerStyle": "resnet3",
213 |       "layerName": "resnet3_2",
214 |       "inputName": "resnet3_1",
215 |       "downsample": false,
216 |       "weightsFile": "layer3.2",
217 |       "parameter": {
218 |         "input_c": 1024,
219 |         "temp_c": 256,
220 |         "output_c": 1024
221 |       }
222 |     },
223 |     {
224 |       "layerStyle": "resnet3",
225 |       "layerName": "resnet3_3",
226 |       "inputName": "resnet3_2",
227 |       "downsample": false,
228 |       "weightsFile": "layer3.3",
229 |       "parameter": {
230 |         "input_c": 1024,
231 |         "temp_c": 256,
232 |         "output_c": 1024
233 |       }
234 |     },
235 |     {
236 |       "layerStyle": "resnet3",
237 |       "layerName": "resnet3_4",
238 |       "inputName": "resnet3_3",
239 |       "downsample": false,
240 |       "weightsFile": "layer3.4",
241 |       "parameter": {
242 |         "input_c": 1024,
243 |         "temp_c": 256,
244 |         "output_c": 1024
245 |       }
246 |     },
247 |     {
248 |       "layerStyle": "resnet3",
249 |       "layerName": "resnet3_5",
250 |       "inputName": "resnet3_4",
251 |       "downsample": false,
252 | 
253 |       "weightsFile": "layer3.5",
254 |       "parameter": {
255 |         "input_c": 1024,
256 |         "temp_c": 256,
257 |         "output_c": 1024
258 |       }
259 |     }, //resnet3_end
260 |     {
261 |       "layerStyle": "resnet3",
262 |       "layerName": "resnet4_0",
263 |       "inputName": "resnet3_5",
264 |       "downsample": true,
265 |       "weightsFile": "layer4.0",
266 |       "parameter": {
267 |         "input_c": 1024,
268 |         "temp_c": 512,
269 |         "output_c": 2048
270 |       }
271 |     },
272 |     {
273 |       "layerStyle": "resnet3",
274 |       "layerName": "resnet4_1",
275 |       "inputName": "resnet4_0",
276 |       "downsample": false,
277 |       "weightsFile": "layer4.1",
278 |       "parameter": {
279 |         "input_c": 1024,
280 |         "temp_c": 512,
281 |         "output_c": 2048
282 |       }
283 |     },
284 |     {
285 |       "layerStyle": "resnet3",
286 |       "layerName": "resnet4_2",
287 |       "inputName": "resnet4_1",
288 |       "downsample": false,
289 |       "weightsFile": "layer4.2",
290 |       "parameter": {
291 |         "input_c": 1024,
292 |         "temp_c": 512,
293 |         "output_c": 2048
294 |       }
295 |     },
296 |     {
297 |       "layerStyle": "reduce",
298 |       "layerName": "avgPool_172",
299 |       "inputName": "resnet4_2",
300 |       "type": "kAVG",
301 |       
302 |       "axes": 6,
303 |       "keepD": true
304 |     },
305 |     {
306 |       "layerStyle": "fc",
307 |       "layerName": "gemm_173",
308 |       "inputName": "avgPool_172",
309 |       "outputName": "output",
310 |       "weightFile": "fc.weight",
311 |       "biasFile": "fc.bias",
312 |       "parameter": {
313 |         "input_c": 2048,
314 |         "output_c": 1000
315 |       }
316 |     }
317 | 
318 |   ]
319 | 
320 | }
321 | 


--------------------------------------------------------------------------------
/model/yolo/yolov5s.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "input_c": 3,
  3 |   "input_h": 640,
  4 |   "input_w": 640,
  5 |   "createENG": true,
  6 |   "ENGPath": "/mnt/f/LearningCode/LinuxCode/yolov5-4.0/yolov5.eng",
  7 |   "weightsDir": "/mnt/f/LearningCode/LinuxCode/yolov5-4.0/wts/",
  8 |   "fp16": true,
  9 |   "inputBlobName": "data",
 10 |   "outputBlobName": "prob",
 11 |   "outputSize": 1801,
 12 |   "maxBatchsize": 10,
 13 |   "doInfer": true,
 14 |   "BatchSize": 3, //  [1,maxBatchsize]
 15 |   "imgDir": "/mnt/f/dataset/oneimg/",
 16 |   "imgType": "jpg",
 17 |   "network": [
 18 |     {
 19 |       "layerStyle": "preInput",
 20 |       "layerName": "m_s",
 21 |       "inputName": "data",
 22 |       "div_255": true
 23 |     },
 24 |     {
 25 |       "layerStyle": "focus",
 26 |       "layerName": "focus_0",
 27 |       "inputName": "m_s"
 28 |     },
 29 |     {
 30 |       "layerStyle": "cba",
 31 |       "layerName": "conv_1",
 32 |       "inputName": "focus_0",
 33 | 
 34 |       "convFile": "model.0.conv.conv",
 35 |       "parameter": {
 36 |         "output_c": 32,
 37 |         "kernel": [ 3, 3 ],
 38 |         "padding": [ 1, 1 ],
 39 |         "stride": [ 1, 1 ]
 40 |       },
 41 |       "bnFile": "model.0.conv.bn",
 42 |       "active_type": "silu",
 43 |       "eps": 1e-3
 44 |     },
 45 |     {
 46 |       "layerStyle": "cba",
 47 |       "layerName": "conv_2",
 48 |       "inputName": "conv_1",
 49 | 
 50 |       "convFile": "model.1.conv",
 51 |       "parameter": {
 52 |         "output_c": 64,
 53 |         "kernel": [ 3, 3 ],
 54 |         "padding": [ 1, 1 ],
 55 |         "stride": [ 2, 2 ]
 56 |       },
 57 |       "bnFile": "model.1.bn",
 58 |       "active_type": "silu",
 59 |       "eps": 1e-3
 60 |     },
 61 |     {
 62 |       "layerStyle": "C3",
 63 |       "layerName": "bottleneck_CSP2",
 64 |       "inputName": "conv_2",
 65 |       "c1": 64,
 66 |       "c2": 64,
 67 |       "n": 1,
 68 |       "shortCut": true,
 69 |       "g": 1,
 70 |       "e": 0.5,
 71 |       "lname": "model.2",
 72 |       "active_type": "silu",
 73 |       "eps": 1e-3
 74 |     },
 75 |     {
 76 |       "layerStyle": "cba",
 77 |       "layerName": "conv_3",
 78 |       "inputName": "bottleneck_CSP2",
 79 |       "convFile": "model.3.conv",
 80 |       "parameter": {
 81 |         "output_c": 128,
 82 |         "kernel": [ 3, 3 ],
 83 |         "padding": [ 1, 1 ],
 84 |         "stride": [ 2, 2 ]
 85 |       },
 86 |       "bnFile": "model.3.bn",
 87 |       "active_type": "silu",
 88 |       "eps": 1e-3
 89 |     },
 90 |     {
 91 |       "layerStyle": "C3",
 92 |       "layerName": "bottleneck_CSP4",
 93 |       "inputName": "conv_3",
 94 |       "c1": 128,
 95 |       "c2": 128,
 96 |       "n": 3,
 97 |       "shortCut": true,
 98 |       "g": 1,
 99 |       "e": 0.5,
100 |       "lname": "model.4",
101 |       "active_type": "silu",
102 |       "eps": 1e-3
103 |     },
104 |     {
105 |       "layerStyle": "cba",
106 |       "layerName": "conv_5",
107 |       "inputName": "bottleneck_CSP4",
108 |       "convFile": "model.5.conv",
109 |       "parameter": {
110 |         "output_c": 256,
111 |         "kernel": [ 3, 3 ],
112 |         "padding": [ 1, 1 ],
113 |         "stride": [ 2, 2 ]
114 |       },
115 |       "bnFile": "model.5.bn",
116 |       "active_type": "silu",
117 |       "eps": 1e-3
118 |     },
119 |     {
120 |       "layerStyle": "C3",
121 |       "layerName": "bottleneck_CSP6",
122 |       "inputName": "conv_5",
123 |       "c1": 256,
124 |       "c2": 256,
125 |       "n": 3,
126 |       "shortCut": true,
127 |       "e": 0.5,
128 |       "lname": "model.6",
129 |       "active_type": "silu",
130 |       "eps": 1e-3
131 |     },
132 |     {
133 |       "layerStyle": "cba",
134 |       "layerName": "conv_7",
135 |       "inputName": "bottleneck_CSP6",
136 |       "convFile": "model.7.conv",
137 |       "parameter": {
138 |         "output_c": 512,
139 |         "kernel": [ 3, 3 ],
140 |         "padding": [ 1, 1 ],
141 |         "stride": [ 2, 2 ]
142 |       },
143 |       "bnFile": "model.7.bn",
144 |       "active_type": "silu",
145 |       "eps": 1e-3
146 |     },
147 |     {
148 |       "layerStyle": "spp",
149 |       "layerName": "spp_8",
150 |       "inputName": "conv_7",
151 |       "lname": "model.8",
152 |       "c1": 512,
153 |       "c2": 512,
154 |       "kernels": [ 5, 9, 13 ],
155 |       "eps": 1e-3,
156 |       "active_type": "silu"
157 |     },
158 |     {
159 |       "layerStyle": "C3",
160 |       "layerName": "bottleneck_CSP9",
161 |       "inputName": "spp_8",
162 |       "c1": 512,
163 |       "c2": 512,
164 |       "n": 1,
165 |       "shortCut": false,
166 |       "g": 1,
167 |       "e": 0.5,
168 |       "lname": "model.9",
169 |       "active_type": "silu",
170 |       "eps": 1e-3
171 |     },
172 |     {
173 |       "layerStyle": "cba",
174 |       "layerName": "conv_10",
175 |       "inputName": "bottleneck_CSP9",
176 | 
177 |       "convFile": "model.10.conv",
178 |       "parameter": {
179 |         "output_c": 256,
180 |         "kernel": [ 1 ],
181 |         "padding": [ 0 ],
182 |         "stride": [ 1 ]
183 |       },
184 |       "bnFile": "model.10.bn",
185 |       "active_type": "silu",
186 |       "eps": 1e-3
187 |     },
188 |     {
189 |       "layerStyle": "upsample",
190 |       "layerName": "upsample_11",
191 | 
192 |       "mode": "nearest",
193 |       "inputName": "conv_10"
194 |     },
195 |     {
196 |       "layerStyle": "concat",
197 |       "layerName": "concat_12",
198 |       "inputName": [ "upsample_11", "bottleneck_CSP6" ],
199 |       "axis": 0
200 |     },
201 |     {
202 |       "layerStyle": "C3",
203 |       "layerName": "bottleneck_CSP13",
204 |       "inputName": "concat_12",
205 |       "c1": 512,
206 |       "c2": 256,
207 |       "n": 1,
208 |       "shortCut": false,
209 |       "g": 1,
210 |       "e": 0.5,
211 |       "lname": "model.13",
212 |       "active_type": "silu",
213 |       "eps": 1e-3
214 |     },
215 |     {
216 |       "layerStyle": "cba",
217 |       "layerName": "conv_14",
218 |       "inputName": "bottleneck_CSP13",
219 |       "convFile": "model.14.conv",
220 |       "parameter": {
221 |         "output_c": 128,
222 |         "kernel": [ 1 ],
223 |         "padding": [ 0 ],
224 |         "stride": [ 1 ]
225 |       },
226 |       "bnFile": "model.14.bn",
227 |       "active_type": "silu",
228 |       "eps": 1e-3
229 |     },
230 |     {
231 |       "layerStyle": "upsample",
232 |       "layerName": "upsample_15",
233 |       "mode": "nearest",
234 |       "inputName": "conv_14"
235 |     },
236 |     {
237 |       "layerStyle": "concat",
238 |       "layerName": "concat_16",
239 |       "inputName": [ "upsample_15", "bottleneck_CSP4" ],
240 |       "axis": 0
241 |     },
242 |     {
243 |       "layerStyle": "C3",
244 |       "layerName": "bottleneck_CSP17",
245 |       "inputName": "concat_16",
246 |       "c1": 256,
247 |       "c2": 128,
248 |       "n": 1,
249 |       "shortCut": false,
250 |       "g": 1,
251 |       "e": 0.5,
252 |       "lname": "model.17",
253 |       "active_type": "silu",
254 |       "eps": 1e-3
255 |     },
256 |     {
257 |       "layerStyle": "cba",
258 |       "layerName": "conv_18",
259 |       "inputName": "bottleneck_CSP17",
260 |       "convFile": "model.18.conv",
261 |       "parameter": {
262 |         "output_c": 128,
263 |         "kernel": [ 3 ],
264 |         "padding": [ 1 ],
265 |         "stride": [ 2 ]
266 |       },
267 |       "bnFile": "model.18.bn",
268 |       "active_type": "silu",
269 |       "eps": 1e-3
270 |     },
271 |     {
272 |       "layerStyle": "concat",
273 |       "layerName": "concat_19",
274 |       "inputName": [ "conv_18", "conv_14" ],
275 |       "axis": 0
276 |     },
277 |     {
278 |       "layerStyle": "C3",
279 |       "layerName": "bottleneck_CSP20",
280 |       "inputName": "concat_19",
281 |       "c1": 256,
282 |       "c2": 256,
283 |       "n": 1,
284 |       "shortCut": false,
285 |       "g": 1,
286 |       "e": 0.5,
287 |       "lname": "model.20",
288 |       "active_type": "silu",
289 |       "eps": 1e-3
290 |     },
291 |     {
292 |       "layerStyle": "cba",
293 |       "layerName": "conv_21",
294 |       "inputName": "bottleneck_CSP20",
295 |       "convFile": "model.21.conv",
296 |       "parameter": {
297 |         "output_c": 256,
298 |         "kernel": [ 3 ],
299 |         "padding": [ 1 ],
300 |         "stride": [ 2 ]
301 |       },
302 |       "bnFile": "model.21.bn",
303 |       "active_type": "silu",
304 |       "eps": 1e-3
305 |     },
306 |     {
307 |       "layerStyle": "concat",
308 |       "layerName": "concat_22",
309 |       "inputName": [ "conv_21", "conv_10" ],
310 |       "axis": 0
311 |     },
312 |     {
313 |       "layerStyle": "C3",
314 |       "layerName": "bottleneck_CSP23",
315 |       "inputName": "concat_22",
316 |       "c1": 512,
317 |       "c2": 512,
318 |       "n": 1,
319 |       "shortCut": false,
320 |       "g": 1,
321 |       "e": 0.5,
322 |       "lname": "model.23",
323 |       "active_type": "silu",
324 |       "eps": 1e-3
325 |     },
326 |     {
327 |       "layerStyle": "conv",
328 |       "layerName": "det_0",
329 |       "inputName": "bottleneck_CSP17",
330 |       "weightFile": "model.24.m.0",
331 |       "biasFile": "model.24.m.0",
332 | 
333 |       "parameter": {
334 |         "output_c": 255, // output_c = 3 * (cls_num + 5)
335 |         "kernel": [ 1 ]
336 |       }
337 |     },
338 |     {
339 |       "layerStyle": "conv",
340 |       "layerName": "det_1",
341 |       "inputName": "bottleneck_CSP20",
342 |       "weightFile": "model.24.m.1",
343 |       "biasFile": "model.24.m.1",
344 | 
345 |       "parameter": {
346 |         "output_c": 255, // output_c = 3 * (cls_num + 5)
347 |         "kernel": [ 1 ]
348 |       }
349 |     },
350 |     {
351 |       "layerStyle": "conv",
352 |       "layerName": "det_2",
353 |       "inputName": "bottleneck_CSP23",
354 |       "weightFile": "model.24.m.2",
355 |       "biasFile": "model.24.m.2",
356 | 
357 |       "parameter": {
358 |         "output_c": 255, // output_c = 3 * (cls_num + 5)
359 |         "kernel": [ 1 ]
360 |       }
361 |     },
362 |     {
363 |       "layerStyle": "yolo",
364 |       "layerName": "YOLO",
365 |       "outputName": "prob",
366 |       "inputName": [ "det_0", "det_1", "det_2" ],
367 |       "anchor_grid": "model.24.anchor_grid",
368 |       //"Anchor": [ 10  ,13  ,16  ,30  ,33  ,23  ,30  ,61  ,62  ,45  ,59  ,119  ,116  ,90  ,156  ,198  ,373  ,326  ],
369 |       "cls_num": 80,
370 |       "max_box": 300 //outputSize = 300 * (4 + 1 + 1) +1 =1801  ,4:(x1,y1,x2,y2); 1(conf) ;1 (cls)
371 |     }
372 |   ]
373 | 
374 | }
375 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt1:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.2)
2 | 
3 | file(GLOB TRT_FILES "*.h" "*.cpp" "./plugin/*.cu" "./plugin/*.h" "./plugin/*.cpp")
4 | 


--------------------------------------------------------------------------------
/src/calibrator.cpp:
--------------------------------------------------------------------------------
  1 | #include <iterator>
  2 | #include "calibrator.h"
  3 | #include "utils.h"
  4 | 
  5 | 
  6 | vector<string> loadImages(const string imgTxt)
  7 | {
  8 |     vector<string> imgInfo;
  9 |     FILE *f = fopen(imgTxt.c_str(),"r");
 10 |     if (!f){
 11 |          perror("Error");
 12 |         cout<<"cant open file"<<imgTxt<<endl;
 13 |         return imgInfo;
 14 |     }
 15 |     char str[512];
 16 |     while (fgets(str,512,f)!=NULL)
 17 |     {
 18 |         for (int i = 0;str[i] != '\0';++i) {
 19 |             if (str[i] == '\r')
 20 |             {
 21 |                 str[i] = '\0';
 22 |             }
 23 |             if (str[i] == '\n')
 24 |             {
 25 |                 str[i] = '\0';
 26 |                 break;
 27 |             }
 28 |         }
 29 |         imgInfo.push_back(str);
 30 |     }
 31 |     fclose(f);
 32 |     return imgInfo;
 33 | }
 34 | 
 35 | 
 36 | calibrator::calibrator(const unsigned int &batchsize,
 37 |                        const string &caliTxt,
 38 |                        const string &calibratorPath,
 39 |                        const unsigned int &inputC,
 40 |                        const unsigned int &inputH,
 41 |                        const unsigned int &inputW,
 42 |                        const string &inputName,
 43 |                        vector<float> Mean,
 44 |                        vector<float> Std,
 45 |                        bool isDiv255):m_batchsize(batchsize),
 46 |                                        m_inputC(inputC),
 47 |                                        m_inputH(inputH),
 48 |                                        m_inputW(inputW),                                       
 49 |                                        m_InputCount(batchsize * inputC * inputH * inputW),
 50 |                                        m_inputName(inputName.c_str()),
 51 |                                        m_calibratorPath(calibratorPath),
 52 |                                        m_ImageIndex(0)
 53 | {
 54 |     m_ImageList = loadImages(caliTxt);
 55 |     m_mean = Mean;
 56 |     m_std = Std;
 57 | 
 58 |     div_255 = isDiv255;
 59 | 
 60 |     cudaMalloc(&m_CudaInput,m_InputCount*sizeof (float));
 61 | 
 62 | }
 63 | int calibrator::getBatchSize() const
 64 | {
 65 |     return m_batchsize;
 66 | }
 67 | bool calibrator::getBatch(void **bindings, const char **names, int nbBindings)
 68 | {
 69 | 
 70 |     if(m_ImageIndex + m_batchsize > m_ImageList.size()){
 71 |         return false;
 72 |     }
 73 |     int flag = 0;
 74 |     cv::Scalar Mean;
 75 |     cv::Scalar Std;
 76 | 
 77 |     if(m_inputC == 3)
 78 |     {
 79 |         flag = 1;
 80 |         Mean = cv::Scalar(m_mean[0], m_mean[1], m_mean[2]);
 81 |         Std = cv::Scalar(m_std[0], m_std[1], m_std[2]);
 82 |     }
 83 |     else if(m_inputC == 1){
 84 |         Mean = cv::Scalar(m_mean[0]);
 85 |         Std = cv::Scalar(m_std[0]);
 86 |     }
 87 |     else {
 88 |         cout<<"not support "<<m_inputC<<" channels"<<endl;
 89 |     }
 90 |     vector<cv::Mat> InputImgs;
 91 |     for (unsigned int i = m_ImageIndex; i < m_ImageIndex + m_batchsize;i++) {
 92 |         string imgPath = m_ImageList.at(i);
 93 |         cout<<imgPath<<endl;
 94 |         cv::Mat temp = cv::imread(imgPath,flag);
 95 |         if(temp.empty()){
 96 |             cout<<imgPath<<" is not a image!"<<endl;
 97 |         }
 98 |         cv::Mat img ;
 99 |         if(div_255)
100 |         {
101 |             temp.convertTo(img,CV_32FC1,1.0/255.0);
102 |             InputImgs.push_back(img);
103 |         }
104 |         else {
105 |             InputImgs.push_back(temp);
106 |         }
107 |     }
108 |     m_ImageIndex += m_batchsize;
109 |     cv::Mat trtInput = BlobFromImages(InputImgs,cv::Size(m_inputH,m_inputW),
110 |                                        Mean,Std,
111 |                                        true,false);
112 | //    cv::Mat trtInput = cv::dnn::blobFromImages(InputImgs,1.0,cv::Size(m_inputH,m_inputW),Mean,true,false);
113 |     cudaMemcpy(m_CudaInput,trtInput.ptr<float>(0),m_InputCount*sizeof (float),cudaMemcpyHostToDevice);
114 | 
115 |     bindings[0] = m_CudaInput;
116 |     return true;
117 | }
118 | const void *calibrator::readCalibrationCache(size_t &length)
119 | {
120 |     void *output;
121 |     m_CalibrationCache.clear();
122 |     ifstream input(m_calibratorPath,ios::binary);
123 |     input >> noskipws;
124 |     if (input.good())
125 |     {
126 |         copy(istream_iterator<char>(input),istream_iterator<char>(),back_inserter(m_CalibrationCache));
127 |     }
128 |     length = m_CalibrationCache.size();
129 | 
130 |     if(length){
131 |         std::cout << "Using cached calibration table to build the engine " << std::endl;
132 |         output = &m_CalibrationCache[0];
133 |     }
134 |     else {
135 |         std::cout << "New calibration table will be created to build the engine" << std::endl;
136 |         output = nullptr;
137 |     }
138 | 
139 |     return output;
140 | }
141 | void calibrator::writeCalibrationCache(const void *ptr, std::size_t length)
142 | {
143 |     assert(!m_calibratorPath.empty());
144 |     cout<<"length =  "<<length<<endl;
145 |     ofstream output(m_calibratorPath,ios::binary);
146 |     output.write(reinterpret_cast<const char*>(ptr),length);
147 |     output.close();
148 | }
149 | 


--------------------------------------------------------------------------------
/src/calibrator.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef CALIBRATOR_H
 3 | #define CALIBRATOR_H
 4 | #include <fstream>
 5 | #include <NvInfer.h>
 6 | #include <cuda.h>
 7 | #include <string>
 8 | #include <iostream>
 9 | #include <vector>
10 | #include <cuda_runtime_api.h>
11 | 
12 | using namespace std;
13 | using namespace nvinfer1;
14 | 
15 | class calibrator : public nvinfer1::IInt8EntropyCalibrator2
16 | {
17 | public:
18 |     calibrator(const unsigned int &batchsize,
19 |                const string &caliTxt,
20 |                const string &calibratorPath,
21 |                const unsigned int &inputC,
22 |                const unsigned int &inputH,
23 |                const unsigned int &inputW,
24 |                const string &inputName,
25 |                vector<float> Mean,
26 |                vector<float> Std,
27 |                bool isDiv255);
28 | 
29 |     int getBatchSize() const override;
30 |     bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
31 |     const void* readCalibrationCache(size_t& length) override;
32 |     void writeCalibrationCache(const void* ptr, std::size_t length) override;
33 | 
34 | private:
35 |     unsigned int m_batchsize;
36 |     const unsigned int m_inputC;
37 |     const unsigned int m_inputH;
38 |     const unsigned int m_inputW;
39 |     vector<float> m_mean;
40 |     vector<float> m_std;
41 |     //const uint64_t m_inputSize;
42 |     const uint64_t m_InputCount;
43 |     const char *m_inputName;
44 |     const string m_calibratorPath{nullptr};
45 |     vector<string> m_ImageList;
46 |     void *m_CudaInput{nullptr};
47 |     vector<char> m_CalibrationCache;
48 |     unsigned int m_ImageIndex;
49 |     bool div_255;
50 | 
51 | };
52 | 
53 | #endif // CALIBRATOR_H
54 | 


--------------------------------------------------------------------------------
/src/json-forwards.h:
--------------------------------------------------------------------------------
  1 | /// Json-cpp amalgamated forward header (http://jsoncpp.sourceforge.net/).
  2 | /// It is intended to be used with #include "json/json-forwards.h"
  3 | /// This header provides forward declaration for all JsonCpp types.
  4 | 
  5 | // //////////////////////////////////////////////////////////////////////
  6 | // Beginning of content of file: LICENSE
  7 | // //////////////////////////////////////////////////////////////////////
  8 | 
  9 | /*
 10 | The JsonCpp library's source code, including accompanying documentation, 
 11 | tests and demonstration applications, are licensed under the following
 12 | conditions...
 13 | 
 14 | Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all 
 15 | jurisdictions which recognize such a disclaimer. In such jurisdictions, 
 16 | this software is released into the Public Domain.
 17 | 
 18 | In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
 19 | 2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
 20 | The JsonCpp Authors, and is released under the terms of the MIT License (see below).
 21 | 
 22 | In jurisdictions which recognize Public Domain property, the user of this 
 23 | software may choose to accept it either as 1) Public Domain, 2) under the 
 24 | conditions of the MIT License (see below), or 3) under the terms of dual 
 25 | Public Domain/MIT License conditions described here, as they choose.
 26 | 
 27 | The MIT License is about as close to Public Domain as a license can get, and is
 28 | described in clear, concise terms at:
 29 | 
 30 |    http://en.wikipedia.org/wiki/MIT_License
 31 |    
 32 | The full text of the MIT License follows:
 33 | 
 34 | ========================================================================
 35 | Copyright (c) 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 36 | 
 37 | Permission is hereby granted, free of charge, to any person
 38 | obtaining a copy of this software and associated documentation
 39 | files (the "Software"), to deal in the Software without
 40 | restriction, including without limitation the rights to use, copy,
 41 | modify, merge, publish, distribute, sublicense, and/or sell copies
 42 | of the Software, and to permit persons to whom the Software is
 43 | furnished to do so, subject to the following conditions:
 44 | 
 45 | The above copyright notice and this permission notice shall be
 46 | included in all copies or substantial portions of the Software.
 47 | 
 48 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 49 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 50 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 51 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 52 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 53 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 54 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 55 | SOFTWARE.
 56 | ========================================================================
 57 | (END LICENSE TEXT)
 58 | 
 59 | The MIT license is compatible with both the GPL and commercial
 60 | software, affording one all of the rights of Public Domain with the
 61 | minor nuisance of being required to keep the above copyright notice
 62 | and license text in the source code. Note also that by accepting the
 63 | Public Domain "license" you can re-license your copy using whatever
 64 | license you like.
 65 | 
 66 | */
 67 | 
 68 | // //////////////////////////////////////////////////////////////////////
 69 | // End of content of file: LICENSE
 70 | // //////////////////////////////////////////////////////////////////////
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | #ifndef JSON_FORWARD_AMALGAMATED_H_INCLUDED
 77 | # define JSON_FORWARD_AMALGAMATED_H_INCLUDED
 78 | /// If defined, indicates that the source file is amalgamated
 79 | /// to prevent private header inclusion.
 80 | #define JSON_IS_AMALGAMATION
 81 | 
 82 | // //////////////////////////////////////////////////////////////////////
 83 | // Beginning of content of file: include/json/config.h
 84 | // //////////////////////////////////////////////////////////////////////
 85 | 
 86 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 87 | // Distributed under MIT license, or public domain if desired and
 88 | // recognized in your jurisdiction.
 89 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 90 | 
 91 | #ifndef JSON_CONFIG_H_INCLUDED
 92 | #define JSON_CONFIG_H_INCLUDED
 93 | #include <cstddef>
 94 | #include <cstdint>
 95 | #include <istream>
 96 | #include <memory>
 97 | #include <ostream>
 98 | #include <sstream>
 99 | #include <string>
100 | #include <type_traits>
101 | 
102 | /// If defined, indicates that json library is embedded in CppTL library.
103 | //# define JSON_IN_CPPTL 1
104 | 
105 | /// If defined, indicates that json may leverage CppTL library
106 | //#  define JSON_USE_CPPTL 1
107 | /// If defined, indicates that cpptl vector based map should be used instead of
108 | /// std::map
109 | /// as Value container.
110 | //#  define JSON_USE_CPPTL_SMALLMAP 1
111 | 
112 | // If non-zero, the library uses exceptions to report bad input instead of C
113 | // assertion macros. The default is to use exceptions.
114 | #ifndef JSON_USE_EXCEPTION
115 | #define JSON_USE_EXCEPTION 1
116 | #endif
117 | 
118 | // Temporary, tracked for removal with issue #982.
119 | #ifndef JSON_USE_NULLREF
120 | #define JSON_USE_NULLREF 1
121 | #endif
122 | 
123 | /// If defined, indicates that the source file is amalgamated
124 | /// to prevent private header inclusion.
125 | /// Remarks: it is automatically defined in the generated amalgamated header.
126 | // #define JSON_IS_AMALGAMATION
127 | 
128 | #ifdef JSON_IN_CPPTL
129 | #include <cpptl/config.h>
130 | #ifndef JSON_USE_CPPTL
131 | #define JSON_USE_CPPTL 1
132 | #endif
133 | #endif
134 | 
135 | #ifdef JSON_IN_CPPTL
136 | #define JSON_API CPPTL_API
137 | #elif defined(JSON_DLL_BUILD)
138 | #if defined(_MSC_VER) || defined(__MINGW32__)
139 | #define JSON_API __declspec(dllexport)
140 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING
141 | #elif defined(__GNUC__) || defined(__clang__)
142 | #define JSON_API __attribute__((visibility("default")))
143 | #endif // if defined(_MSC_VER)
144 | #elif defined(JSON_DLL)
145 | #if defined(_MSC_VER) || defined(__MINGW32__)
146 | #define JSON_API __declspec(dllimport)
147 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING
148 | #endif // if defined(_MSC_VER)
149 | #endif // ifdef JSON_IN_CPPTL
150 | #if !defined(JSON_API)
151 | #define JSON_API
152 | #endif
153 | 
154 | #if defined(_MSC_VER) && _MSC_VER < 1800
155 | #error                                                                         \
156 |     "ERROR:  Visual Studio 12 (2013) with _MSC_VER=1800 is the oldest supported compiler with sufficient C++11 capabilities"
157 | #endif
158 | 
159 | #if defined(_MSC_VER) && _MSC_VER < 1900
160 | // As recommended at
161 | // https://stackoverflow.com/questions/2915672/snprintf-and-visual-studio-2010
162 | extern JSON_API int
163 | msvc_pre1900_c99_snprintf(char* outBuf, size_t size, const char* format, ...);
164 | #define jsoncpp_snprintf msvc_pre1900_c99_snprintf
165 | #else
166 | #define jsoncpp_snprintf std::snprintf
167 | #endif
168 | 
169 | // If JSON_NO_INT64 is defined, then Json only support C++ "int" type for
170 | // integer
171 | // Storages, and 64 bits integer support is disabled.
172 | // #define JSON_NO_INT64 1
173 | 
174 | // JSONCPP_OVERRIDE is maintained for backwards compatibility of external tools.
175 | // C++11 should be used directly in JSONCPP.
176 | #define JSONCPP_OVERRIDE override
177 | 
178 | #if __cplusplus >= 201103L
179 | #define JSONCPP_NOEXCEPT noexcept
180 | #define JSONCPP_OP_EXPLICIT explicit
181 | #elif defined(_MSC_VER) && _MSC_VER < 1900
182 | #define JSONCPP_NOEXCEPT throw()
183 | #define JSONCPP_OP_EXPLICIT explicit
184 | #elif defined(_MSC_VER) && _MSC_VER >= 1900
185 | #define JSONCPP_NOEXCEPT noexcept
186 | #define JSONCPP_OP_EXPLICIT explicit
187 | #else
188 | #define JSONCPP_NOEXCEPT throw()
189 | #define JSONCPP_OP_EXPLICIT
190 | #endif
191 | 
192 | #if defined(__clang__)
193 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1
194 | #elif defined(__GNUC__) && (__GNUC__ >= 6)
195 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1
196 | #endif
197 | 
198 | #if !defined(JSON_IS_AMALGAMATION)
199 | 
200 | #include "allocator.h"
201 | #include "version.h"
202 | 
203 | #endif // if !defined(JSON_IS_AMALGAMATION)
204 | 
205 | namespace Json {
206 | typedef int Int;
207 | typedef unsigned int UInt;
208 | #if defined(JSON_NO_INT64)
209 | typedef int LargestInt;
210 | typedef unsigned int LargestUInt;
211 | #undef JSON_HAS_INT64
212 | #else                 // if defined(JSON_NO_INT64)
213 | // For Microsoft Visual use specific types as long long is not supported
214 | #if defined(_MSC_VER) // Microsoft Visual Studio
215 | typedef __int64 Int64;
216 | typedef unsigned __int64 UInt64;
217 | #else                 // if defined(_MSC_VER) // Other platforms, use long long
218 | typedef int64_t Int64;
219 | typedef uint64_t UInt64;
220 | #endif                // if defined(_MSC_VER)
221 | typedef Int64 LargestInt;
222 | typedef UInt64 LargestUInt;
223 | #define JSON_HAS_INT64
224 | #endif // if defined(JSON_NO_INT64)
225 | 
226 | template <typename T>
227 | using Allocator = typename std::conditional<JSONCPP_USING_SECURE_MEMORY,
228 |                                             SecureAllocator<T>,
229 |                                             std::allocator<T>>::type;
230 | using String = std::basic_string<char, std::char_traits<char>, Allocator<char>>;
231 | using IStringStream = std::basic_istringstream<String::value_type,
232 |                                                String::traits_type,
233 |                                                String::allocator_type>;
234 | using OStringStream = std::basic_ostringstream<String::value_type,
235 |                                                String::traits_type,
236 |                                                String::allocator_type>;
237 | using IStream = std::istream;
238 | using OStream = std::ostream;
239 | } // namespace Json
240 | 
241 | // Legacy names (formerly macros).
242 | using JSONCPP_STRING = Json::String;
243 | using JSONCPP_ISTRINGSTREAM = Json::IStringStream;
244 | using JSONCPP_OSTRINGSTREAM = Json::OStringStream;
245 | using JSONCPP_ISTREAM = Json::IStream;
246 | using JSONCPP_OSTREAM = Json::OStream;
247 | 
248 | #endif // JSON_CONFIG_H_INCLUDED
249 | 
250 | // //////////////////////////////////////////////////////////////////////
251 | // End of content of file: include/json/config.h
252 | // //////////////////////////////////////////////////////////////////////
253 | 
254 | 
255 | 
256 | 
257 | 
258 | 
259 | // //////////////////////////////////////////////////////////////////////
260 | // Beginning of content of file: include/json/forwards.h
261 | // //////////////////////////////////////////////////////////////////////
262 | 
263 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
264 | // Distributed under MIT license, or public domain if desired and
265 | // recognized in your jurisdiction.
266 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
267 | 
268 | #ifndef JSON_FORWARDS_H_INCLUDED
269 | #define JSON_FORWARDS_H_INCLUDED
270 | 
271 | #if !defined(JSON_IS_AMALGAMATION)
272 | #include "config.h"
273 | #endif // if !defined(JSON_IS_AMALGAMATION)
274 | 
275 | namespace Json {
276 | 
277 | // writer.h
278 | class StreamWriter;
279 | class StreamWriterBuilder;
280 | class Writer;
281 | class FastWriter;
282 | class StyledWriter;
283 | class StyledStreamWriter;
284 | 
285 | // reader.h
286 | class Reader;
287 | class CharReader;
288 | class CharReaderBuilder;
289 | 
290 | // json_features.h
291 | class Features;
292 | 
293 | // value.h
294 | typedef unsigned int ArrayIndex;
295 | class StaticString;
296 | class Path;
297 | class PathArgument;
298 | class Value;
299 | class ValueIteratorBase;
300 | class ValueIterator;
301 | class ValueConstIterator;
302 | 
303 | } // namespace Json
304 | 
305 | #endif // JSON_FORWARDS_H_INCLUDED
306 | 
307 | // //////////////////////////////////////////////////////////////////////
308 | // End of content of file: include/json/forwards.h
309 | // //////////////////////////////////////////////////////////////////////
310 | 
311 | 
312 | 
313 | 
314 | 
315 | #endif //ifndef JSON_FORWARD_AMALGAMATED_H_INCLUDED
316 | 


--------------------------------------------------------------------------------
/src/plugin/UpsampleKernel.cu:
--------------------------------------------------------------------------------
  1 | #include "UpsmapleKernel.h"
  2 | 
  3 | 
  4 | /**
  5 |  * @brief caculate the number of cuda kernel for upsample. (Cite from: 《GPU高性能编程CUDA实战》P46,P47)
  6 |  * 
  7 |  * @param total_thread_num: the number of cuda thread of you want to used for upsample
  8 |  * @param max_thread_num: the gpu device property
  9 |  * @return int  the number of cuda kernel for upsample
 10 |  */
 11 | int get_kernel_num(int total_thread_num, int max_thread_num)
 12 | {
 13 |     return (total_thread_num + max_thread_num - 1)/max_thread_num;
 14 | }
 15 | 
 16 | int get_max_thread_num()
 17 | {
 18 |     cudaDeviceProp prop;
 19 |     cudaGetDeviceProperties(&prop, 0);
 20 |     return prop.maxThreadsPerBlock;
 21 | }
 22 | 
 23 | __host__ __forceinline__ float linear_upsampling_compute_scale(int input_size, int output_size)
 24 | {
 25 |     return float(input_size)/float(output_size) ;
 26 | }
 27 | 
 28 | __device__ __forceinline__ float linear_upsampling_compute_source_index(float scale, int dst_index, int intput_size)
 29 | {
 30 |     float src_idx = scale * (dst_index + 0.5)-0.5;
 31 |     return (src_idx>=0) ? src_idx : 0;
 32 | }
 33 | 
 34 | 
 35 | __device__ __forceinline__ int get_index(const int batch_idx, const int channel_idx, const int height_idx, const int width_idx, 
 36 |                 const int batch_total, const int channel_total, const int width)
 37 | {
 38 |     int ret_idx = batch_idx * batch_total
 39 |                     + channel_idx * channel_total
 40 |                     + height_idx * width
 41 |                     + width_idx;
 42 |     return ret_idx;
 43 | }
 44 | 
 45 | /**
 46 |  * @brief 
 47 |  * 
 48 |  * @tparam T 
 49 |  * @param n 
 50 |  * @param input_shape: input data shape. such as [batch, channel, height, width] 
 51 |  * @param rate_h 
 52 |  * @param rate_w 
 53 |  * @param inputs 
 54 |  * @param outputs 
 55 |  * @return __global__ BilinearKernel 
 56 |  * @TODO: 
 57 |  *  
 58 |  */
 59 | 
 60 | 
 61 | template <typename T>
 62 | __global__ void BilinearKernel(
 63 |         const int n,
 64 |         int input_b,
 65 |         int input_c,
 66 |         int input_h,
 67 |         int input_w,
 68 |         int output_h,
 69 |         int output_w,
 70 |         const float rate_h,
 71 |         const float rate_w,
 72 |         const T* inputs,
 73 |         T* outputs)
 74 | {
 75 | 
 76 |     int index = threadIdx.x + blockIdx.x * blockDim.x;
 77 |     if(index < n)
 78 |     {
 79 |         const int w2 = index % output_w;
 80 |         const int h2 = index / output_w;
 81 | 
 82 | 
 83 |         const float h1r = linear_upsampling_compute_source_index(rate_h, h2, input_h);
 84 |         const int h1 = int(h1r);
 85 |         const int h1p = (h1 < input_h - 1) ? 1 : 0;
 86 |         const float h1lambda = h1r - h1;
 87 |         const float h0lambda = 1 - h1lambda;
 88 | 
 89 |         const float w1r = linear_upsampling_compute_source_index(rate_w, w2, input_w);
 90 |         const int w1 = int(w1r);
 91 |         const int w1p = (w1 < input_w - 1) ? 1 : 0;
 92 |         const float w1lambda = w1r - w1;
 93 |         const float w0lambda = 1 - w1lambda;
 94 | 
 95 |         int s_batch_total_1 = input_c * input_h * input_w;
 96 |         int s_channel_total_1 = input_h * input_w;
 97 | 
 98 |         int s_batch_total_2 = input_c * output_h * output_w;
 99 |         int s_channel_total_2 = output_h * output_w;
100 | 
101 | 
102 |         const int batch_size = input_b;
103 |         const int channel_size = input_c;
104 | 
105 |         for(int b_idx=0; b_idx<batch_size; b_idx++)
106 |         {
107 |             for(int c=0; c<channel_size; c++)
108 |             {
109 |                 const T val = h0lambda * (w0lambda * inputs[get_index(b_idx, c, h1, w1, s_batch_total_1, s_channel_total_1, input_w)]
110 |                                     + w1lambda * inputs[get_index(b_idx, c, h1, w1+w1p, s_batch_total_1, s_channel_total_1, input_w)])
111 |                                     + h1lambda * (w0lambda * inputs[get_index(b_idx, c, h1+h1p, w1, s_batch_total_1, s_channel_total_1, input_w)]
112 |                                     + w1lambda * inputs[get_index(b_idx, c, h1+h1p, w1+w1p, s_batch_total_1, s_channel_total_1, input_w)]);
113 |                 outputs[get_index(b_idx, c, h2, w2, s_batch_total_2, s_channel_total_2, output_w)] = val;
114 |                 
115 |             }
116 |         }
117 |     }
118 | }
119 | 
120 | 
121 | int UpsampleInference(
122 |     cudaStream_t stream,
123 |     int n,
124 |     int input_b,
125 |     int input_c,
126 |     int input_h,
127 |     int input_w,
128 |     float scale_h,
129 |     float scale_w,
130 |     const void* inputs,
131 |     void* outputs)
132 | {
133 |     int output_h = int(input_h * scale_h);
134 |     int output_w = int(input_w * scale_w);
135 |     int max_threads = get_max_thread_num();
136 |     int kernel_num = get_kernel_num(n, max_threads);
137 |     float rate_h = linear_upsampling_compute_scale(input_h, output_h);
138 |     float rate_w = linear_upsampling_compute_scale(input_w, output_w);
139 | 
140 |     BilinearKernel<float><<< kernel_num, max_threads, 0, stream>>>(n,input_b,input_c,input_h,input_w,
141 |                                                                                     output_h, output_w, 
142 |                                                                                     rate_h, rate_w,
143 |                                                                                     static_cast<const float*>(inputs),
144 |                                                                                     static_cast<float*>(outputs));
145 |     return 0;
146 | }
147 | 


--------------------------------------------------------------------------------
/src/plugin/UpsamplePlugin.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include "UpsmapleKernel.h"
  3 | #include "UpsamplePlugin.h"
  4 | 
  5 | #include<cassert>
  6 | #include <cstring>
  7 | 
  8 | using namespace nvinfer1;
  9 | 
 10 | // Upsample plugin specific constants
 11 | namespace {
 12 |     static const char* UPSAMPLE_PLUGIN_VERSION{"1"};
 13 |     static const char* UPSAMPLE_PLUGIN_NAME{"UpsamplePlugin"};
 14 | }
 15 | 
 16 | // Static class fields initialization
 17 | PluginFieldCollection UpsamplePluginCreator::mFC{};
 18 | std::vector<PluginField> UpsamplePluginCreator::mPluginAttributes;
 19 | 
 20 | REGISTER_TENSORRT_PLUGIN(UpsamplePluginCreator);
 21 | 
 22 | template<typename T>
 23 | void writeToBuffer(char*& buffer, const T& val)
 24 | {
 25 |     *reinterpret_cast<T*>(buffer) = val;
 26 |     buffer += sizeof(T);
 27 | }
 28 | 
 29 | // Helper function for deserializing plugin
 30 | template<typename T>
 31 | T readFromBuffer(const char*& buffer)
 32 | {
 33 |     T val = *reinterpret_cast<const T*>(buffer);
 34 |     buffer += sizeof(T);
 35 |     return val;
 36 | }
 37 | 
 38 | UpsamplePlugin::UpsamplePlugin(const std::string name, float scale_h, float scale_w)
 39 |     : mLayerName(name)
 40 |     , mScaleFactor_h(scale_h)
 41 |     , mScaleFactor_w(scale_w)
 42 | {
 43 |     mInputShape.c() = -1;
 44 |     mInputShape.h() = -1;
 45 |     mInputShape.w() = -1;
 46 |     mInputVolume = 0;
 47 | }
 48 | 
 49 | UpsamplePlugin::UpsamplePlugin(const std::string name, const void* data, size_t length)
 50 |     : mLayerName(name)
 51 | {
 52 |     const char *d = static_cast<const char *>(data);
 53 |     const char *a = d;
 54 | 
 55 |     mScaleFactor_h = readFromBuffer<float>(d);
 56 |     mScaleFactor_w = readFromBuffer<float>(d);
 57 |     mInputVolume = readFromBuffer<size_t>(d);
 58 |     mInputShape.c() = readFromBuffer<int>(d);
 59 |     mInputShape.h() = readFromBuffer<int>(d);
 60 |     mInputShape.w() = readFromBuffer<int>(d);
 61 | 
 62 |     assert(d == (a + length));
 63 | 
 64 | }
 65 | 
 66 | const char* UpsamplePlugin::getPluginType() const
 67 | {
 68 |     return UPSAMPLE_PLUGIN_NAME;
 69 | }
 70 | 
 71 | const char* UpsamplePlugin::getPluginVersion() const
 72 | {
 73 |     return UPSAMPLE_PLUGIN_VERSION;
 74 | }
 75 | 
 76 | int UpsamplePlugin::getNbOutputs() const
 77 | {
 78 |     return 1;
 79 | }
 80 | 
 81 | Dims UpsamplePlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
 82 | {
 83 |     assert(index == 0);
 84 |     assert(nbInputDims == 1);
 85 |     assert(inputs[0].nbDims == 3);
 86 |     return nvinfer1::DimsCHW{inputs[0].d[0],int(inputs[0].d[1]*mScaleFactor_h), int(inputs[0].d[2]*mScaleFactor_w)};
 87 | }
 88 | 
 89 | int UpsamplePlugin::initialize()
 90 | {
 91 |     //printf("UpsamplePlugin::initialize\n");
 92 |     return 0;
 93 | }
 94 | 
 95 | 
 96 | int UpsamplePlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void*, cudaStream_t stream)
 97 | {
 98 |     //printf("UpsamplePlugin::enqueue\n");
 99 |     int status = -1;
100 | 
101 |     // Our plugin outputs only one tensor
102 |     void* output = outputs[0];
103 | 
104 |     // Launch CUDA kernel wrapper and save its return value
105 |     status = UpsampleInference(stream, mInputVolume, 
106 |                                 batchSize, mInputShape.c(), mInputShape.h(), mInputShape.w(),
107 |                                 mScaleFactor_h,mScaleFactor_w,
108 |                                 inputs[0], output);
109 |     return status;
110 | }
111 | 
112 | size_t UpsamplePlugin::getSerializationSize() const
113 | {
114 |     //printf("UpsamplePlugin::getSerializationSize\n");
115 |     return sizeof(mScaleFactor_h)  + sizeof(mScaleFactor_w) +
116 |             sizeof(mInputVolume) + sizeof(mInputShape.c()) + 
117 |             sizeof(mInputShape.h()) + sizeof(mInputShape.w());
118 | }
119 | 
120 | 
121 | void UpsamplePlugin::serialize(void* buffer) const 
122 | {
123 |     //printf("UpsamplePlugin::serialize\n");
124 |     char *d = static_cast<char *>(buffer);
125 |     const char *a = d;
126 | 
127 |     writeToBuffer(d, mScaleFactor_h);
128 |     writeToBuffer(d, mScaleFactor_w);
129 |     writeToBuffer(d, mInputVolume);
130 |     writeToBuffer(d, mInputShape.c());
131 |     writeToBuffer(d, mInputShape.h());
132 |     writeToBuffer(d, mInputShape.w());
133 | 
134 |     assert(d == a + getSerializationSize());
135 | }
136 | 
137 | void UpsamplePlugin::configureWithFormat(const Dims* inputs, int nbInputs, const Dims* outputs, int nbOutputs, DataType type, PluginFormat format, int)
138 | {
139 |     assert(nbOutputs == 1);
140 |     assert(type == DataType::kFLOAT);
141 |     assert(format == PluginFormat::kNCHW);
142 |     assert(inputs[0].nbDims == 3);
143 | 
144 |     size_t volume = int(inputs[0].d[1]*mScaleFactor_h) * int(inputs[0].d[2]*mScaleFactor_w);
145 |     mInputVolume = volume;
146 |     mInputShape.c() = inputs[0].d[0];
147 |     mInputShape.h() = inputs[0].d[1];
148 |     mInputShape.w() = inputs[0].d[2];
149 | }
150 | 
151 | bool UpsamplePlugin::supportsFormat(DataType type, PluginFormat format) const
152 | {
153 |     if (type == DataType::kFLOAT && format == PluginFormat::kNCHW)
154 |         return true;
155 |     else
156 |         return false;
157 | }
158 | 
159 | void UpsamplePlugin::terminate() {}
160 | 
161 | void UpsamplePlugin::destroy() {
162 |     // This gets called when the network containing plugin is destroyed
163 |     delete this;
164 | }
165 | 
166 | IPluginV2* UpsamplePlugin::clone() const
167 | {
168 |     return new UpsamplePlugin(mLayerName, mScaleFactor_h, mScaleFactor_w);
169 | }
170 | 
171 | void UpsamplePlugin::setPluginNamespace(const char* libNamespace) 
172 | {
173 |     mNamespace = libNamespace;
174 | }
175 | 
176 | const char* UpsamplePlugin::getPluginNamespace() const
177 | {
178 |     return mNamespace.c_str();
179 | }
180 | 
181 | UpsamplePluginCreator::UpsamplePluginCreator()
182 | {
183 |     mPluginAttributes.emplace_back(PluginField("scaleFactor", nullptr, PluginFieldType::kFLOAT32, 1));
184 | 
185 |     mFC.nbFields = mPluginAttributes.size();
186 |     mFC.fields = mPluginAttributes.data();
187 | }
188 | const char* UpsamplePluginCreator::getPluginName() const
189 | {
190 |     return UPSAMPLE_PLUGIN_NAME;
191 | }
192 | 
193 | const char* UpsamplePluginCreator::getPluginVersion() const
194 | {
195 |     return UPSAMPLE_PLUGIN_VERSION;
196 | }
197 | 
198 | const PluginFieldCollection* UpsamplePluginCreator::getFieldNames()
199 | {
200 |     return &mFC;
201 | }
202 | 
203 | IPluginV2* UpsamplePluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
204 | {
205 |     float scaleFactor_h = 0.f;
206 |     float scaleFactor_w = 0.f;
207 |     const PluginField* fields = fc->fields;
208 | 
209 |     assert(fc->nbFields == 1);
210 |     for (int i = 0; i < fc->nbFields; i++){
211 |     
212 |         if (strcmp(fields[i].name, "scaleFactor") == 0) {
213 |             assert(fields[i].type == PluginFieldType::kFLOAT32);
214 |             scaleFactor_h = *(static_cast<const float*>(fields[i].data));
215 |             scaleFactor_w = *(static_cast<const float*>(fields[i].data)+1);
216 |             //std::cout<<scaleFactor_h<< " , "<<scaleFactor_w<<std::endl;
217 |         } 
218 |     }
219 |     return new UpsamplePlugin(name, scaleFactor_h, scaleFactor_w);
220 | }
221 | 
222 | IPluginV2* UpsamplePluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
223 | {
224 |     return new UpsamplePlugin(name, serialData, serialLength);
225 | }
226 | 
227 | void UpsamplePluginCreator::setPluginNamespace(const char* libNamespace) 
228 | {
229 |     mNamespace = libNamespace;
230 | }
231 | 
232 | const char* UpsamplePluginCreator::getPluginNamespace() const
233 | {
234 |     return mNamespace.c_str();
235 | }
236 | 


--------------------------------------------------------------------------------
/src/plugin/UpsamplePlugin.h:
--------------------------------------------------------------------------------
 1 | #ifndef UPSAMPLE_PLUGIN_H
 2 | #define UPSAMPLE_PLUGIN_H
 3 | 
 4 | #include "NvInferPlugin.h"
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | 
 9 | using namespace nvinfer1;
10 | 
11 | class UpsamplePlugin : public IPluginV2
12 | {
13 | public:
14 |     UpsamplePlugin(const std::string name, float scale_h,float scale_w);
15 | 
16 |     UpsamplePlugin(const std::string name, const void* data, size_t length);
17 | 
18 |     // It doesn't make sense to make UpsamplePlugin without arguments, so we delete default constructor.
19 |     UpsamplePlugin() = delete;
20 | 
21 |     int getNbOutputs() const override;
22 | 
23 |     Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
24 | 
25 |     int initialize() override;
26 | 
27 |     void terminate() override;
28 | 
29 |     size_t getWorkspaceSize(int) const override { return 0; };
30 | 
31 |     int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override;
32 | 
33 |     size_t getSerializationSize() const override;
34 | 
35 |     void serialize(void* buffer) const override;
36 | 
37 |     void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override;
38 | 
39 |     bool supportsFormat(DataType type, PluginFormat format) const override;
40 | 
41 |     const char* getPluginType() const override;
42 | 
43 |     const char* getPluginVersion() const override;
44 | 
45 |     void destroy() override;
46 | 
47 |     nvinfer1::IPluginV2* clone() const override;
48 | 
49 |     void setPluginNamespace(const char* pluginNamespace) override;
50 | 
51 |     const char* getPluginNamespace() const override;
52 | 
53 | private:
54 |     const std::string mLayerName;
55 |     bool mAlignCorners;
56 |     float mScaleFactor_h;
57 |     float mScaleFactor_w;
58 |     size_t mInputVolume;
59 |     DimsCHW mInputShape;
60 |     std::string mNamespace;
61 | };
62 | 
63 | class UpsamplePluginCreator : public IPluginCreator
64 | {
65 | public:
66 |     UpsamplePluginCreator();
67 | 
68 |     const char* getPluginName() const override;
69 | 
70 |     const char* getPluginVersion() const override;
71 | 
72 |     const PluginFieldCollection* getFieldNames() override;
73 | 
74 |     IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) override;
75 | 
76 |     IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
77 |     
78 |     void setPluginNamespace(const char* pluginNamespace) override;
79 | 
80 |     const char* getPluginNamespace() const override;
81 | 
82 | private:
83 |     static PluginFieldCollection mFC;
84 |     static std::vector<PluginField> mPluginAttributes;
85 |     std::string mNamespace;
86 | };
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------
/src/plugin/UpsmapleKernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef UPSAMPLE_KERNEL_H
 2 | #define UPSAMPLE_KERNEL_H
 3 | 
 4 | #include <iostream>
 5 | #include "NvInfer.h"
 6 | 
 7 | int UpsampleInference(
 8 |     cudaStream_t stream,
 9 |     int n,
10 |     int input_b,
11 |     int input_c,
12 |     int input_h,
13 |     int input_w,
14 |     float scale_h,
15 |     float scale_w,
16 |     const void* inputs,
17 |     void* outputs);
18 | 
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/plugin/hardswish.cu:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include "hardswish.h"
  3 | #include "utils.h"
  4 | 
  5 | namespace nvinfer1
  6 | {
  7 |     HardSwishPlugin::HardSwishPlugin()
  8 |     {
  9 |     }
 10 | 
 11 |     HardSwishPlugin::~HardSwishPlugin()
 12 |     {
 13 |     }
 14 | 
 15 |     // create the plugin at runtime from a byte stream
 16 |     HardSwishPlugin::HardSwishPlugin(const void* data, size_t length)
 17 |     {
 18 |         const char *d = reinterpret_cast<const char *>(data), *a = d;
 19 |         read(d, mInputSize);
 20 |         assert(d == a + length);
 21 |     }
 22 | 
 23 |     void HardSwishPlugin::serialize(void* buffer) const
 24 |     {
 25 |         char* d = static_cast<char*>(buffer), *a = d;
 26 |         write(d, mInputSize);
 27 |         assert(d == a + getSerializationSize());
 28 |     }
 29 | 
 30 |     size_t HardSwishPlugin::getSerializationSize() const
 31 |     {
 32 |         return sizeof(mInputSize);
 33 |     }
 34 | 
 35 |     int HardSwishPlugin::initialize()
 36 |     {
 37 |         return 0;
 38 |     }
 39 | 
 40 |     Dims HardSwishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
 41 |     {
 42 |         assert(nbInputDims == 1);
 43 |         assert(index == 0);
 44 |         return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]);
 45 |     }
 46 | 
 47 |     // Set plugin namespace
 48 |     void HardSwishPlugin::setPluginNamespace(const char* pluginNamespace)
 49 |     {
 50 |         mPluginNamespace = pluginNamespace;
 51 |     }
 52 | 
 53 |     const char* HardSwishPlugin::getPluginNamespace() const
 54 |     {
 55 |         return mPluginNamespace;
 56 |     }
 57 | 
 58 |     // Return the DataType of the plugin output at the requested index
 59 |     DataType HardSwishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
 60 |     {
 61 |         return DataType::kFLOAT;
 62 |     }
 63 | 
 64 |     // Return true if output tensor is broadcast across a batch.
 65 |     bool HardSwishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
 66 |     {
 67 |         return false;
 68 |     }
 69 | 
 70 |     // Return true if plugin can use input that is broadcast across batch without replication.
 71 |     bool HardSwishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
 72 |     {
 73 |         return false;
 74 |     }
 75 | 
 76 |     void HardSwishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
 77 |     {
 78 |         mInputSize = in[0].dims.d[0] * in[0].dims.d[1] * in[0].dims.d[2];
 79 |     }
 80 | 
 81 |     // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
 82 |     void HardSwishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
 83 |     {
 84 |     }
 85 | 
 86 |     // Detach the plugin object from its execution context.
 87 |     void HardSwishPlugin::detachFromContext() {}
 88 | 
 89 |     const char* HardSwishPlugin::getPluginType() const
 90 |     {
 91 |         return "HardSwishLayer_TRT";
 92 |     }
 93 | 
 94 |     const char* HardSwishPlugin::getPluginVersion() const
 95 |     {
 96 |         return "1";
 97 |     }
 98 | 
 99 |     void HardSwishPlugin::destroy()
100 |     {
101 |         delete this;
102 |     }
103 | 
104 |     // Clone the plugin
105 |     IPluginV2IOExt* HardSwishPlugin::clone() const
106 |     {
107 |         HardSwishPlugin *p = new HardSwishPlugin();
108 |         p->setPluginNamespace(mPluginNamespace);
109 |         p->setInputSize(mInputSize);
110 |         return p;
111 |     }
112 | 
113 | 
114 |     __global__ void HardSwishKer(const float *in, float *out, int size) {
115 |         int idx = threadIdx.x + blockIdx.x * blockDim.x;
116 |         if (idx >= size)
117 |             return;
118 | 
119 |         if (in[idx] >= 3.0f)
120 |             out[idx] = in[idx];
121 |         else if (in[idx] < -3.0f)
122 |             out[idx] = 0.0f;
123 |         else
124 |             out[idx] = in[idx] * (in[idx] + 3.0f) / 6.0f;
125 |     }
126 | 
127 |     void HardSwishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) {
128 | 
129 |         int numElem = batchSize * mInputSize;
130 |         HardSwishKer<<<(numElem + mThreadCount - 1) / mThreadCount, mThreadCount>>>
131 |             (inputs[0], output, numElem);
132 |     }
133 | 
134 | 
135 |     int HardSwishPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
136 |     {
137 |         forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
138 |         return 0;
139 |     }
140 | 
141 |     PluginFieldCollection HardSwishPluginCreator::mFC{};
142 |     std::vector<PluginField> HardSwishPluginCreator::mPluginAttributes;
143 | 
144 |     HardSwishPluginCreator::HardSwishPluginCreator()
145 |     {
146 |         mPluginAttributes.clear();
147 |         mFC.nbFields = mPluginAttributes.size();
148 |         mFC.fields = mPluginAttributes.data();
149 |     }
150 | 
151 |     const char* HardSwishPluginCreator::getPluginName() const
152 |     {
153 |             return "HardSwishLayer_TRT";
154 |     }
155 | 
156 |     const char* HardSwishPluginCreator::getPluginVersion() const
157 |     {
158 |             return "1";
159 |     }
160 | 
161 |     const PluginFieldCollection* HardSwishPluginCreator::getFieldNames()
162 |     {
163 |             return &mFC;
164 |     }
165 | 
166 |     IPluginV2IOExt* HardSwishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
167 |     {
168 |         HardSwishPlugin* obj = new HardSwishPlugin();
169 |         obj->setPluginNamespace(mNamespace.c_str());
170 |         return obj;
171 |     }
172 | 
173 |     IPluginV2IOExt* HardSwishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
174 |     {
175 |         // This object will be deleted when the network is destroyed, which will
176 |         // call MishPlugin::destroy()
177 |         HardSwishPlugin* obj = new HardSwishPlugin(serialData, serialLength);
178 |         obj->setPluginNamespace(mNamespace.c_str());
179 |         return obj;
180 |     }
181 | 
182 | }
183 | 


--------------------------------------------------------------------------------
/src/plugin/hardswish.h:
--------------------------------------------------------------------------------
 1 | #ifndef _HARD_SWISH_LAYER_H
 2 | #define _HARD_SWISH_LAYER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include "NvInfer.h"
 7 | 
 8 | namespace nvinfer1
 9 | {
10 |     class HardSwishPlugin: public IPluginV2IOExt
11 |     {
12 |         public:
13 |             explicit HardSwishPlugin();
14 |             HardSwishPlugin(const void* data, size_t length);
15 |             ~HardSwishPlugin();
16 | 
17 |             int getNbOutputs() const override
18 |             {
19 |                 return 1;
20 |             }
21 | 
22 |             Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
23 |             int initialize() override;
24 |             virtual void terminate() override {};
25 |             virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
26 |             virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
27 |             virtual size_t getSerializationSize() const override;
28 |             virtual void serialize(void* buffer) const override;
29 | 
30 |             bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
31 |                 return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
32 |             }
33 | 
34 |             const char* getPluginType() const override;
35 |             const char* getPluginVersion() const override;
36 |             void destroy() override;
37 |             IPluginV2IOExt* clone() const override;
38 |             void setPluginNamespace(const char* pluginNamespace) override;
39 |             const char* getPluginNamespace() const override;
40 |             DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
41 |             bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
42 |             bool canBroadcastInputAcrossBatch(int inputIndex) const override;
43 |             void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
44 |             void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
45 |             void detachFromContext() override;
46 | 
47 |             void setInputSize(int s) {
48 |                 mInputSize = s;
49 |             }
50 | 
51 |         private:
52 |             void forwardGpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1);
53 |             int mThreadCount = 256;
54 |             int mInputSize;
55 |             const char* mPluginNamespace;
56 |     };
57 | 
58 |     class HardSwishPluginCreator : public IPluginCreator
59 |     {
60 |         public:
61 |             HardSwishPluginCreator();
62 |             ~HardSwishPluginCreator() override = default;
63 |             const char* getPluginName() const override;
64 |             const char* getPluginVersion() const override;
65 |             const PluginFieldCollection* getFieldNames() override;
66 |             IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
67 |             IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
68 | 
69 |             void setPluginNamespace(const char* libNamespace) override
70 |             {
71 |                 mNamespace = libNamespace;
72 |             }
73 | 
74 |             const char* getPluginNamespace() const override
75 |             {
76 |                 return mNamespace.c_str();
77 |             }
78 | 
79 |         private:
80 |             std::string mNamespace;
81 |             static PluginFieldCollection mFC;
82 |             static std::vector<PluginField> mPluginAttributes;
83 |     };
84 |     REGISTER_TENSORRT_PLUGIN(HardSwishPluginCreator);
85 | };
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/src/plugin/yololayer.cu:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include "yololayer.h"
  3 | #include "utils.h"
  4 | 
  5 | 
  6 | using namespace Yolo;
  7 | 
  8 | namespace nvinfer1
  9 | {
 10 |     YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector<Yolo::YoloKernel>& vYoloKernel)
 11 |     {
 12 |         mClassCount = classCount;
 13 |         mYoloV5NetWidth = netWidth;
 14 |         mYoloV5NetHeight = netHeight;
 15 |         mMaxOutObject = maxOut;
 16 |         mYoloKernel = vYoloKernel;
 17 |         mKernelCount = vYoloKernel.size();
 18 | 
 19 |         CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
 20 |         size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2;
 21 |         for (int ii = 0; ii < mKernelCount; ii++)
 22 |         {
 23 |             CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
 24 |             const auto& yolo = mYoloKernel[ii];
 25 |             CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
 26 |         }
 27 |     }
 28 |     YoloLayerPlugin::~YoloLayerPlugin()
 29 |     {
 30 |         for (int ii = 0; ii < mKernelCount; ii++)
 31 |         {
 32 |             CUDA_CHECK(cudaFree(mAnchor[ii]));
 33 |         }
 34 |         CUDA_CHECK(cudaFreeHost(mAnchor));
 35 |     }
 36 | 
 37 |     // create the plugin at runtime from a byte stream
 38 |     YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
 39 |     {
 40 |         //using namespace Tn;
 41 |         const char *d = reinterpret_cast<const char *>(data), *a = d;
 42 |         read(d, mClassCount);
 43 |         read(d, mThreadCount);
 44 |         read(d, mKernelCount);
 45 |         read(d, mYoloV5NetWidth);
 46 |         read(d, mYoloV5NetHeight);
 47 |         read(d, mMaxOutObject);
 48 |         mYoloKernel.resize(mKernelCount);
 49 |         auto kernelSize = mKernelCount * sizeof(YoloKernel);
 50 |         memcpy(mYoloKernel.data(), d, kernelSize);
 51 |         d += kernelSize;
 52 |         CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
 53 |         size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2;
 54 |         for (int ii = 0; ii < mKernelCount; ii++)
 55 |         {
 56 |             CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
 57 |             const auto& yolo = mYoloKernel[ii];
 58 |             CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
 59 |         }
 60 |         assert(d == a + length);
 61 |     }
 62 | 
 63 |     void YoloLayerPlugin::serialize(void* buffer) const
 64 |     {
 65 |         //using namespace Tn;
 66 |         char* d = static_cast<char*>(buffer), *a = d;
 67 |         write(d, mClassCount);
 68 |         write(d, mThreadCount);
 69 |         write(d, mKernelCount);
 70 |         write(d, mYoloV5NetWidth);
 71 |         write(d, mYoloV5NetHeight);
 72 |         write(d, mMaxOutObject);
 73 |         auto kernelSize = mKernelCount * sizeof(YoloKernel);
 74 |         memcpy(d, mYoloKernel.data(), kernelSize);
 75 |         d += kernelSize;
 76 | 
 77 |         assert(d == a + getSerializationSize());
 78 |     }
 79 | 
 80 |     size_t YoloLayerPlugin::getSerializationSize() const
 81 |     {
 82 |         return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount) + sizeof(Yolo::YoloKernel) * mYoloKernel.size() + sizeof(mYoloV5NetWidth) + sizeof(mYoloV5NetHeight) + sizeof(mMaxOutObject);
 83 |     }
 84 | 
 85 |     int YoloLayerPlugin::initialize()
 86 |     {
 87 |         return 0;
 88 |     }
 89 | 
 90 |     Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
 91 |     {
 92 |         //output the result to channel
 93 |         int totalsize = mMaxOutObject * sizeof(Detection) / sizeof(float);
 94 | 
 95 |         return Dims3(totalsize + 1, 1, 1);
 96 |     }
 97 | 
 98 |     // Set plugin namespace
 99 |     void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
100 |     {
101 |         mPluginNamespace = pluginNamespace;
102 |     }
103 | 
104 |     const char* YoloLayerPlugin::getPluginNamespace() const
105 |     {
106 |         return mPluginNamespace;
107 |     }
108 | 
109 |     // Return the DataType of the plugin output at the requested index
110 |     DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
111 |     {
112 |         return DataType::kFLOAT;
113 |     }
114 | 
115 |     // Return true if output tensor is broadcast across a batch.
116 |     bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
117 |     {
118 |         return false;
119 |     }
120 | 
121 |     // Return true if plugin can use input that is broadcast across batch without replication.
122 |     bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
123 |     {
124 |         return false;
125 |     }
126 | 
127 |     void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
128 |     {
129 |     }
130 | 
131 |     // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
132 |     void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
133 |     {
134 |     }
135 | 
136 |     // Detach the plugin object from its execution context.
137 |     void YoloLayerPlugin::detachFromContext() {}
138 | 
139 |     const char* YoloLayerPlugin::getPluginType() const
140 |     {
141 |         return "YoloLayer_TRT";
142 |     }
143 | 
144 |     const char* YoloLayerPlugin::getPluginVersion() const
145 |     {
146 |         return "1";
147 |     }
148 | 
149 |     void YoloLayerPlugin::destroy()
150 |     {
151 |         delete this;
152 |     }
153 | 
154 |     // Clone the plugin
155 |     IPluginV2IOExt* YoloLayerPlugin::clone() const
156 |     {
157 |         YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, mYoloKernel);
158 |         p->setPluginNamespace(mPluginNamespace);
159 |         return p;
160 |     }
161 | 
162 |     __device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); };
163 | 
164 |     __global__ void CalDetection(const float *input, float *output, int noElements,
165 |         const int netwidth, const int netheight, int maxoutobject, int yoloWidth, int yoloHeight, const float anchors[CHECK_COUNT * 2], int classes, int outputElem)
166 |     {
167 | 
168 |         int idx = threadIdx.x + blockDim.x * blockIdx.x;
169 |         if (idx >= noElements) return;
170 | 
171 |         int total_grid = yoloWidth * yoloHeight;
172 |         int bnIdx = idx / total_grid;
173 |         idx = idx - total_grid * bnIdx;
174 |         int info_len_i = 5 + classes;
175 |         const float* curInput = input + bnIdx * (info_len_i * total_grid * CHECK_COUNT);
176 | 
177 |         for (int k = 0; k < 3; ++k) {
178 |             float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]);
179 |             if (box_prob < IGNORE_THRESH) continue;
180 |             int class_id = 0;
181 |             float max_cls_prob = 0.0;
182 |             for (int i = 5; i < info_len_i; ++i) {
183 |                 float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]);
184 |                 if (p > max_cls_prob) {
185 |                     max_cls_prob = p;
186 |                     class_id = i - 5;
187 |                 }
188 |             }
189 |             float *res_count = output + bnIdx * outputElem;
190 |             int count = (int)atomicAdd(res_count, 1);
191 |             if (count >= maxoutobject) return;
192 |             char* data = (char *)res_count + sizeof(float) + count * sizeof(Detection);
193 |             Detection* det = (Detection*)(data);
194 | 
195 |             int row = idx / yoloWidth;
196 |             int col = idx % yoloWidth;
197 | 
198 |             //Location
199 |             // pytorch:
200 |             //  y = x[i].sigmoid()
201 |             //  y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
202 |             //  y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh 
203 |             //  X: (sigmoid(tx) + cx)/FeaturemapW *  netwidth 
204 |             det->bbox[0] = (col - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid])) * netwidth / yoloWidth;
205 |             det->bbox[1] = (row - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid])) * netheight / yoloHeight;
206 | 
207 |             // W: (Pw * e^tw) / FeaturemapW * netwidth  
208 |             // v5: https://github.com/ultralytics/yolov5/issues/471
209 |             det->bbox[2] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]);
210 |             det->bbox[2] = det->bbox[2] * det->bbox[2] * anchors[2 * k];
211 |             det->bbox[3] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]);
212 |             det->bbox[3] = det->bbox[3] * det->bbox[3] * anchors[2 * k + 1];
213 |             det->conf = box_prob * max_cls_prob;
214 |             det->class_id = class_id;
215 |         }
216 |     }
217 | 
218 |     void YoloLayerPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize)
219 |     {
220 |         int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float);
221 |         for (int idx = 0; idx < batchSize; ++idx) {
222 |             CUDA_CHECK(cudaMemset(output + idx * outputElem, 0, sizeof(float)));
223 |         }
224 |         int numElem = 0;
225 |         for (unsigned int i = 0; i < mYoloKernel.size(); ++i)
226 |         {
227 |             const auto& yolo = mYoloKernel[i];
228 |             numElem = yolo.width*yolo.height*batchSize; 
229 |             if (numElem < mThreadCount)
230 |                 mThreadCount = numElem;
231 | 
232 |             //printf("Net: %d  %d \n", mYoloV5NetWidth, mYoloV5NetHeight);
233 |             CalDetection << < (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount >> >
234 |                 (inputs[i], output, numElem, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, yolo.width, yolo.height, (float *)mAnchor[i], mClassCount, outputElem);
235 |         }
236 |     }
237 | 
238 | 
239 |     int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
240 |     {
241 |         forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
242 |         return 0;
243 |     }
244 | 
245 |     PluginFieldCollection YoloPluginCreator::mFC{};
246 |     std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
247 | 
248 |     YoloPluginCreator::YoloPluginCreator()
249 |     {
250 |         mPluginAttributes.clear();
251 | 
252 |         mFC.nbFields = mPluginAttributes.size();
253 |         mFC.fields = mPluginAttributes.data();
254 |     }
255 | 
256 |     const char* YoloPluginCreator::getPluginName() const
257 |     {
258 |         return "YoloLayer_TRT";
259 |     }
260 | 
261 |     const char* YoloPluginCreator::getPluginVersion() const
262 |     {
263 |         return "1";
264 |     }
265 | 
266 |     const PluginFieldCollection* YoloPluginCreator::getFieldNames()
267 |     {
268 |         return &mFC;
269 |     }
270 | 
271 |     IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
272 |     {
273 |         int class_count = 80;
274 |         int input_w = 416;
275 |         int input_h = 416;
276 |         int max_output_object_count = 1000;
277 |         std::vector<Yolo::YoloKernel> yolo_kernels(3);
278 | 
279 |         const PluginField* fields = fc->fields;
280 |         for (int i = 0; i < fc->nbFields; i++) {
281 |             if (strcmp(fields[i].name, "netdata") == 0) {
282 |                 assert(fields[i].type == PluginFieldType::kFLOAT32);
283 |                 int *tmp = (int*)(fields[i].data);
284 |                 class_count = tmp[0];
285 |                 input_w = tmp[1];
286 |                 input_h = tmp[2];
287 |                 max_output_object_count = tmp[3];
288 |             } else if (strstr(fields[i].name, "yolodata") != NULL) {
289 |                 assert(fields[i].type == PluginFieldType::kFLOAT32);
290 |                 int *tmp = (int*)(fields[i].data);
291 |                 YoloKernel kernel;
292 |                 kernel.width = tmp[0];
293 |                 kernel.height = tmp[1];
294 |                 for (int j = 0; j < fields[i].length - 2; j++) {
295 |                     kernel.anchors[j] = tmp[j + 2];
296 |                 }
297 |                 yolo_kernels[2 - (fields[i].name[8] - '1')] = kernel;
298 |             }
299 |         }
300 |         YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, yolo_kernels);
301 |         obj->setPluginNamespace(mNamespace.c_str());
302 |         return obj;
303 |     }
304 | 
305 |     IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
306 |     {
307 |         // This object will be deleted when the network is destroyed, which will
308 |         // call YoloLayerPlugin::destroy()
309 |         YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
310 |         obj->setPluginNamespace(mNamespace.c_str());
311 |         return obj;
312 |     }
313 | }
314 | 


--------------------------------------------------------------------------------
/src/plugin/yololayer.h:
--------------------------------------------------------------------------------
  1 | #ifndef _YOLO_LAYER_H
  2 | #define _YOLO_LAYER_H
  3 | /*
  4 |  *
  5 | */
  6 | #include <vector>
  7 | #include <string>
  8 | #include <iostream>
  9 | #include <NvInfer.h>
 10 | using namespace std;
 11 | namespace Yolo
 12 | {
 13 |     static constexpr int CHECK_COUNT = 3;
 14 |     static constexpr float IGNORE_THRESH = 0.1f;
 15 |     struct YoloKernel
 16 |     {
 17 |         int width;
 18 |         int height;
 19 |         float anchors[CHECK_COUNT * 2];
 20 |     };
 21 | 
 22 |     static constexpr int LOCATIONS = 4;
 23 |     struct alignas(float) Detection {
 24 |         //center_x center_y w h
 25 |         float bbox[LOCATIONS];
 26 |         float conf;  // bbox_conf * cls_conf
 27 |         float class_id;
 28 |     };
 29 | }
 30 | 
 31 | namespace nvinfer1
 32 | {
 33 |     class YoloLayerPlugin : public IPluginV2IOExt
 34 |     {
 35 |     public:
 36 |         YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector<Yolo::YoloKernel>& vYoloKernel);
 37 |         YoloLayerPlugin(const void* data, size_t length);
 38 |         ~YoloLayerPlugin();
 39 | 
 40 |         int getNbOutputs() const override
 41 |         {
 42 |             return 1;
 43 |         }
 44 | 
 45 |         Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
 46 | 
 47 |         int initialize() override;
 48 | 
 49 |         virtual void terminate() override {};
 50 | 
 51 |         virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }
 52 | 
 53 |         virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
 54 | 
 55 |         virtual size_t getSerializationSize() const override;
 56 | 
 57 |         virtual void serialize(void* buffer) const override;
 58 | 
 59 |         bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
 60 |             return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
 61 |         }
 62 | 
 63 |         const char* getPluginType() const override;
 64 | 
 65 |         const char* getPluginVersion() const override;
 66 | 
 67 |         void destroy() override;
 68 | 
 69 |         IPluginV2IOExt* clone() const override;
 70 | 
 71 |         void setPluginNamespace(const char* pluginNamespace) override;
 72 | 
 73 |         const char* getPluginNamespace() const override;
 74 | 
 75 |         DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
 76 | 
 77 |         bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
 78 | 
 79 |         bool canBroadcastInputAcrossBatch(int inputIndex) const override;
 80 | 
 81 |         void attachToContext(
 82 |             cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
 83 | 
 84 |         void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
 85 | 
 86 |         void detachFromContext() override;
 87 | 
 88 |     private:
 89 |         void forwardGpu(const float *const * inputs, float * output, cudaStream_t stream, int batchSize = 1);
 90 |         int mThreadCount = 256;
 91 |         const char* mPluginNamespace;
 92 |         int mKernelCount;
 93 |         int mClassCount;
 94 |         int mYoloV5NetWidth;
 95 |         int mYoloV5NetHeight;
 96 |         int mMaxOutObject;
 97 |         std::vector<Yolo::YoloKernel> mYoloKernel;
 98 |         void** mAnchor;
 99 |     };
100 | 
101 |     class YoloPluginCreator : public IPluginCreator
102 |     {
103 |     public:
104 |         YoloPluginCreator();
105 | 
106 |         ~YoloPluginCreator() override = default;
107 | 
108 |         const char* getPluginName() const override;
109 | 
110 |         const char* getPluginVersion() const override;
111 | 
112 |         const PluginFieldCollection* getFieldNames() override;
113 | 
114 |         IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
115 | 
116 |         IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
117 | 
118 |         void setPluginNamespace(const char* libNamespace) override
119 |         {
120 |             mNamespace = libNamespace;
121 |         }
122 | 
123 |         const char* getPluginNamespace() const override
124 |         {
125 |             return mNamespace.c_str();
126 |         }
127 | 
128 |     private:
129 |         std::string mNamespace;
130 |         static PluginFieldCollection mFC;
131 |         static std::vector<PluginField> mPluginAttributes;
132 |     };
133 |     REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
134 | };
135 | 
136 | #endif 
137 | 


--------------------------------------------------------------------------------
/src/trt.h:
--------------------------------------------------------------------------------
  1 | #ifndef TRT_H
  2 | #define TRT_H
  3 | 
  4 | 
  5 | #include <fstream>
  6 | #include "json.h"
  7 | #include <assert.h>
  8 | #include "NvOnnxParser.h"
  9 | #include "NvOnnxConfig.h"
 10 | #include "calibrator.h"
 11 | #include "UpsamplePlugin.h"
 12 | #include "yololayer.h"
 13 | #include "hardswish.h"
 14 | 
 15 | 
 16 | struct Param{
 17 |     int input_c;
 18 |     int input_h;
 19 |     int input_w;
 20 |     bool createENG;
 21 |     string ENGPath;
 22 |     bool fp16;
 23 |     bool int8;
 24 |     bool Div_255;
 25 |     string cali_txt;
 26 |     string cali_table;
 27 |     vector<float> mean;
 28 |     vector<float> std;
 29 |     string weightPath;
 30 |     string wtsAllPath;
 31 |     string onnxPath;
 32 |     string inputBlobName;
 33 |     //string outputBlobName;
 34 |     int maxBatchsize;
 35 |     int outputSize = 0;
 36 |     bool doInfer;
 37 |     int BatchSize;
 38 |     string imgDir;
 39 |     string imgType;
 40 |     Json::Value layers;
 41 |     vector<string> outputNames;
 42 | };
 43 | 
 44 | class Logger:public nvinfer1::ILogger
 45 | {
 46 | public:
 47 |     void log(nvinfer1::ILogger::Severity severity, const char *msg) override
 48 |     {
 49 |         // suppress info-level messages
 50 |         if (severity == Severity::kINFO)
 51 |             return;
 52 | 
 53 |         switch (severity)
 54 |         {
 55 |         case Severity::kINTERNAL_ERROR:
 56 |             std::cerr << "INTERNAL_ERROR: ";
 57 |             break;
 58 |         case Severity::kERROR:
 59 |             std::cerr << "ERROR: ";
 60 |             break;
 61 |         case Severity::kWARNING:
 62 |             std::cerr << "WARNING: ";
 63 |             break;
 64 |         case Severity::kINFO:
 65 |             std::cerr << "INFO: ";
 66 |             break;
 67 |         default:
 68 |             std::cerr << "UNKNOWN: ";
 69 |             break;
 70 |         }
 71 |         std::cerr << msg << std::endl;
 72 |     }
 73 | };
 74 | 
 75 | class trt
 76 | {
 77 | public:
 78 |     trt(const string &jsonPath);
 79 |     ~trt();
 80 |     void debug_print(nvinfer1::ITensor *input_tensor,const string &head);
 81 |     void printWeight(Weights wts, int wtsSize);
 82 |     vector<float> loadWeights(const string &filePath);
 83 |     void createENG();
 84 |     void onnx2trt();
 85 |     void addLayer(Json::Value layer);
 86 |     void inference_init(int batchsize);
 87 |     void doInference(const float *input, int batchsize, float *output);
 88 |     void doInference_int(const float *input, int batchsize, int *output);
 89 |     ITensor* trt_convNet(ITensor* input,string weightsPath,string biasFile,
 90 |                          int output_c,DimsHW kernel,DimsHW stride = DimsHW{1,1},
 91 |                          DimsHW padding =DimsHW{0,0},DimsHW dilations =DimsHW{1,1},
 92 |                          int groups = 1,bool pre = false,bool post = false);
 93 |     ITensor* trt_deconvNet(ITensor* input,string weightsPath,string biasFile,
 94 |                          int output_c,DimsHW kernel,DimsHW stride = DimsHW{1,1},
 95 |                          DimsHW padding =DimsHW{0,0},DimsHW dilations =DimsHW{1,1},
 96 |                          int groups = 1,bool pre = false,bool post = false);
 97 |     ITensor* trt_bnNet(ITensor* input, string weightsPath,float eps=1.0e-5);
 98 |     ITensor* trt_activeNet(ITensor* input,string acti_type,float alpha=0.0,float beta=0.0);
 99 |     ITensor* trt_poolNet(ITensor* input,string pooltype,DimsHW kernel,DimsHW stride,DimsHW padding);
100 |     ITensor* trt_eltNet(ITensor* input1,ITensor* input2,string elt_Type);
101 |     ITensor* conv_bn_active(ITensor* input,string weightsPath, int output_c,int k,int s=1,int p=0,int dilations=1,int groups=1,
102 |                             float eps=1e-5, string active_type="relu", float alpha=0.0, float beta=0.0);
103 |     ITensor* trt_resnetCBA(Json::Value temp,ITensor* input);
104 |     void trt_preInput(Json::Value layer);
105 |     void trt_conv(Json::Value layer);
106 |     void trt_deconv(Json::Value layer);
107 |     void trt_padding(Json::Value layer);
108 |     void trt_bn(Json::Value layer);
109 |     void trt_active(Json::Value layer);
110 |     void trt_pool(Json::Value layer);
111 |     void trt_Pool(Json::Value layer);
112 |     void trt_elt(Json::Value layer);
113 |     void trt_fc(Json::Value layer);
114 |     void trt_concat(Json::Value layer);
115 |     void trt_slice(Json::Value layer);
116 |     void trt_softmax(Json::Value layer);
117 |     void trt_shuffle(Json::Value layer);
118 |     void trt_matmul(Json::Value layer);
119 |     void trt_topk(Json::Value layer);
120 |     void trt_reduce(Json::Value layer);
121 |     void trt_constant(Json::Value layer);
122 |     void trt_pReLU(Json::Value layer);
123 |     void trt_convBnActive(Json::Value layer);
124 |     void trt_resnetLayer(Json::Value layer);
125 |     void trt_resnet3(Json::Value layer);
126 |     void trt_focus(Json::Value layer);
127 |     void trt_UpSample(Json::Value layer);
128 |     void trt_UpSample_plugin(Json::Value layer);
129 |     void trt_groupNorm(Json::Value layer);
130 |     void trt_unary(Json::Value layer);
131 |     ITensor* convBlock(ITensor* input,int outch,int k,int s,string lname,string acti_type,
132 |                        float eps=1e-3,float alpha = 0.0);
133 |     ITensor* bottleneck(ITensor* input, string lname,string acti_type,int c1, int c2, bool shortcut, float e,
134 |                         float eps=1e-3,float alpha = 0.0);
135 |     ITensor* SPP();
136 |     void yolo_C3(Json::Value layer);
137 |     void trt_yolo(Json::Value layer);
138 |     void yolo_spp(Json::Value layer);
139 |     int out_channel(int &out_c, int &n);
140 |     void resnet(Json::Value layer);
141 |     void hrnet_res(Json::Value layer);
142 |     void hrnet_up(Json::Value layer);
143 | 
144 | 
145 | 
146 | 
147 |     Param param;
148 | private:
149 |     Logger m_logger;
150 | 
151 |     map<string,ITensor*> Layers;
152 |     INetworkDefinition *m_Network; //network
153 |     vector<void *> m_bindings;
154 |     void* temp;
155 |     vector<int> outputs;
156 |     nvinfer1::IExecutionContext *m_context;
157 |     cudaStream_t m_cudaStream;
158 |     nvinfer1::ICudaEngine *m_engine;
159 |     int inputIndex;
160 |     int outputIndex;
161 | };
162 | 
163 | #endif // TRT_H
164 | 


--------------------------------------------------------------------------------
/src/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | 
 3 | 
 4 | void mblobFromImages(cv::InputArrayOfArrays images_, cv::OutputArray blob_,
 5 |     cv::Size size, const cv::Scalar& mean_, const cv::Scalar& std_, bool swapRB, bool crop)
 6 | {
 7 |     //CV_TRACE_FUNCTION();
 8 |     std::vector<cv::Mat> images;
 9 |     images_.getMatVector(images);
10 |     CV_Assert(!images.empty());
11 |     for (int i = 0; i < images.size(); i++)
12 |     {
13 |         cv::Size imgSize = images[i].size();
14 |         if (size == cv::Size())
15 |             size = imgSize;
16 |         if (size != imgSize)
17 |         {
18 |             if (crop)
19 |             {
20 |                 float resizeFactor = std::max(size.width / (float)imgSize.width,
21 |                     size.height / (float)imgSize.height);
22 |                 resize(images[i], images[i], cv::Size(), resizeFactor, resizeFactor, cv::INTER_LINEAR);
23 |                 cv::Rect crop(cv::Point(0.5 * (images[i].cols - size.width),
24 |                     0.5 * (images[i].rows - size.height)),
25 |                     size);
26 |                 images[i] = images[i](crop);
27 |             }
28 |             else
29 |                 resize(images[i], images[i], size, 0, 0, cv::INTER_LINEAR);
30 |         }
31 |         if (images[i].depth() == CV_8U)
32 |             images[i].convertTo(images[i], CV_32F);
33 |         cv::Scalar mean = mean_;
34 |         cv::Scalar std_num = std_;
35 |         if (swapRB)
36 |         {
37 |             std::swap(mean[0], mean[2]);
38 |             std::swap(std_num[0], std_num[2]);
39 |         }
40 | 
41 |         images[i] -= mean;
42 |         cv::divide(images[i], std_num, images[i]);
43 |     }
44 | 
45 |     size_t i, nimages = images.size();
46 |     cv::Mat image0 = images[0];
47 |     int nch = image0.channels();
48 |     CV_Assert(image0.dims == 2);
49 |     cv::Mat image;
50 |     if (nch == 3 || nch == 4)
51 |     {
52 |         int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
53 |         blob_.create(4, sz, CV_32F);
54 |         cv::Mat blob = blob_.getMat();
55 |         cv::Mat ch[4];
56 | 
57 |         for (i = 0; i < nimages; i++)
58 |         {
59 |             image = images[i];
60 |             CV_Assert(image.depth() == CV_32F);
61 |             nch = image.channels();
62 |             CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
63 |             CV_Assert(image.size() == image0.size());
64 | 
65 |             for (int j = 0; j < nch; j++)
66 |                 ch[j] = cv::Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, j));
67 |             if (swapRB)
68 |                 std::swap(ch[0], ch[2]);
69 |             split(image, ch);
70 |         }
71 |     }
72 |     else
73 |     {
74 |         CV_Assert(nch == 1);
75 |         int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
76 |         blob_.create(4, sz, CV_32F);
77 |         cv::Mat blob = blob_.getMat();
78 | 
79 |         for (i = 0; i < nimages; i++)
80 |         {
81 |             cv::Mat image = images[i];
82 |             CV_Assert(image.depth() == CV_32F);
83 |             nch = image.channels();
84 |             CV_Assert(image.dims == 2 && (nch == 1));
85 |             CV_Assert(image.size() == image0.size());
86 | 
87 |             image.copyTo(cv::Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, 0)));
88 |         }
89 |     }
90 | }
91 | cv::Mat BlobFromImages(cv::InputArrayOfArrays images, cv::Size size,
92 |     const cv::Scalar& mean, const cv::Scalar& std_num, bool swapRB, bool crop)
93 | {
94 |     //CV_TRACE_FUNCTION();
95 |     cv::Mat blob;
96 |     mblobFromImages(images, blob, size, mean, std_num, swapRB, crop);
97 |     return blob;
98 | }
99 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <vector>
 7 | #include "opencv2/core/core.hpp"
 8 | #include "opencv2/dnn/dnn.hpp"
 9 | #include "opencv2/imgcodecs/imgcodecs.hpp"
10 | #include "opencv2/imgproc/imgproc.hpp"
11 | using namespace std;
12 | 
13 | #ifndef CUDA_CHECK
14 | 
15 | #define CUDA_CHECK(callstr)                                                                    \
16 |     {                                                                                          \
17 |         cudaError_t error_code = callstr;                                                      \
18 |         if (error_code != cudaSuccess) {                                                       \
19 |             std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
20 |             assert(0);                                                                         \
21 |         }                                                                                      \
22 |     }
23 | 
24 | #endif
25 | 
26 | template<typename T>
27 | void write(char*& buffer, const T& val)
28 | {
29 |     *reinterpret_cast<T*>(buffer) = val;
30 |     buffer += sizeof(T);
31 | }
32 | 
33 | template<typename T>
34 | void read(const char*& buffer, T& val)
35 | {
36 |     val = *reinterpret_cast<const T*>(buffer);
37 |     buffer += sizeof(T);
38 | }
39 | 
40 | 
41 | void mblobFromImages(cv::InputArrayOfArrays images_, cv::OutputArray blob_,
42 |     cv::Size size, const cv::Scalar& mean_, const cv::Scalar& std_, bool swapRB, bool crop);
43 | cv::Mat BlobFromImages(cv::InputArrayOfArrays images, cv::Size size,
44 |     const cv::Scalar& mean, const cv::Scalar& std_num, bool swapRB, bool crop);
45 | #endif // UTILS_H
46 | 


--------------------------------------------------------------------------------