├── .gitignore
├── Makefile
├── README.md
└── src
    ├── application
        ├── app_fastbev.cpp
        ├── fastbev.cpp
        ├── fastbev.hpp
        └── fastbev_decode.cu
    ├── main.cpp
    └── tensorRT
        ├── builder
            ├── trt_builder.cpp
            └── trt_builder.hpp
        ├── common
            ├── cuda_tools.cpp
            ├── cuda_tools.hpp
            ├── ilogger.cpp
            ├── ilogger.hpp
            ├── infer_controller.hpp
            ├── json.cpp
            ├── json.hpp
            ├── monopoly_allocator.hpp
            ├── preprocess_kernel.cu
            ├── preprocess_kernel.cuh
            ├── trt_tensor.cpp
            └── trt_tensor.hpp
        ├── import_lib.cpp
        ├── infer
            ├── trt_infer.cpp
            └── trt_infer.hpp
        ├── onnx
            ├── onnx-ml.pb.cpp
            ├── onnx-ml.pb.h
            ├── onnx-operators-ml.pb.cpp
            ├── onnx-operators-ml.pb.h
            ├── onnx_pb.h
            ├── onnxifi.h
            └── readme.md
        ├── onnx_parser
            ├── ImporterContext.hpp
            ├── LoopHelpers.cpp
            ├── LoopHelpers.hpp
            ├── ModelImporter.cpp
            ├── ModelImporter.hpp
            ├── NvOnnxParser.cpp
            ├── NvOnnxParser.h
            ├── OnnxAttrs.cpp
            ├── OnnxAttrs.hpp
            ├── RNNHelpers.cpp
            ├── RNNHelpers.hpp
            ├── ShapeTensor.cpp
            ├── ShapeTensor.hpp
            ├── ShapedWeights.cpp
            ├── ShapedWeights.hpp
            ├── Status.hpp
            ├── TensorOrWeights.hpp
            ├── builtin_op_importers.cpp
            ├── builtin_op_importers.hpp
            ├── onnx2trt.hpp
            ├── onnx2trt_common.hpp
            ├── onnx2trt_runtime.hpp
            ├── onnx2trt_utils.cpp
            ├── onnx2trt_utils.hpp
            ├── onnxErrorRecorder.cpp
            ├── onnxErrorRecorder.hpp
            ├── onnx_utils.hpp
            ├── readme.md
            ├── toposort.hpp
            ├── trt_utils.hpp
            └── utils.hpp
        └── onnxplugin
            ├── onnxplugin.cpp
            ├── onnxplugin.hpp
            ├── plugin_binary_io.cpp
            ├── plugin_binary_io.hpp
            └── plugins
                └── Project2Dto3D.cu


/.gitignore:
--------------------------------------------------------------------------------
1 | workspace/
2 | build/
3 | .vscode/


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | cpp_srcs := $(shell find src -name "*.cpp")
  4 | cpp_objs := $(cpp_srcs:.cpp=.o)
  5 | cpp_objs := $(cpp_objs:src/%=build/%)
  6 | cpp_mk   := $(cpp_objs:.o=.mk)
  7 | 
  8 | cu_srcs := $(shell find src -name "*.cu")
  9 | cu_objs := $(cu_srcs:.cu=.cuo)
 10 | cu_objs := $(cu_objs:src/%=build/%)
 11 | cu_mk   := $(cu_objs:.cuo=.cumk)
 12 | 
 13 | # 配置你的库路径
 14 | lean_protobuf  := /home/wicri/fy/code/cpp_cuda_centernet/protobuf/aarch64/protobuf
 15 | lean_tensor_rt := /usr/lib/aarch64-linux-gnu/
 16 | lean_cudnn     := /usr/local/cuda
 17 | lean_opencv    := /usr/include/
 18 | lean_cuda      := /usr/local/cuda
 19 | use_python     := false
 20 | python_root    := /datav/software/anaconda3
 21 | python_name    := python3.9
 22 | 
 23 | include_paths := src        \
 24 | 			src/application \
 25 | 			src/camerasdk \
 26 | 			src/tensorRT	\
 27 | 			src/tensorRT/common  \
 28 | 			$(lean_protobuf)/include \
 29 | 			$(lean_opencv)/opencv4/ \
 30 | 			/usr/include/aarch64-linux-gnu/ \
 31 | 			$(lean_cuda)/include  \
 32 | 			$(lean_cudnn)/include 
 33 | 
 34 | library_paths := $(lean_protobuf)/lib \
 35 | 			$(lean_opencv)/opencv4/    \
 36 | 			/usr/include/aarch64-linux-gnu/ \
 37 | 			$(lean_cuda)/lib64  \
 38 | 			$(lean_cudnn)/lib
 39 | 
 40 | link_librarys := opencv_core opencv_imgproc opencv_videoio opencv_imgcodecs \
 41 | 			nvinfer nvinfer_plugin \
 42 | 			cuda cublas cudart cudnn \
 43 | 			stdc++ protobuf dl  opencv_highgui opencv_ml
 44 | 
 45 | # HAS_PYTHON表示是否编译python支持
 46 | support_define    := 
 47 | 
 48 | ifeq ($(use_python), true) 
 49 | include_paths  += $(lean_python)/include/python3.8
 50 | library_paths  += $(lean_python)/lib
 51 | link_librarys  += python3.8
 52 | support_define += -DHAS_PYTHON
 53 | endif
 54 | 
 55 | paths     := $(foreach item,$(library_paths),-Wl,-rpath=$(item))
 56 | include_paths := $(foreach item,$(include_paths),-I$(item))
 57 | library_paths := $(foreach item,$(library_paths),-L$(item))
 58 | link_librarys := $(foreach item,$(link_librarys),-l$(item))
 59 | 
 60 | # 如果是其他显卡，请修改-gencode=arch=compute_75,code=sm_75为对应显卡的能力
 61 | cpp_compile_flags := -std=c++11 -fPIC -g -fopenmp -w -O0 $(support_define)
 62 | cu_compile_flags  := -std=c++11 -m64 -Xcompiler -fPIC -g -w -gencode=arch=compute_72,code=sm_72 -O0 $(support_define)
 63 | link_flags        := -pthread -fopenmp -Wl,-rpath='$$ORIGIN'
 64 | 
 65 | cpp_compile_flags += $(include_paths)
 66 | cu_compile_flags  += $(include_paths)
 67 | link_flags 		  += $(library_paths) $(link_librarys) $(paths)
 68 | 
 69 | cpp_compile_flags += -I/opt/ros/melodic/include
 70 | 
 71 | ifneq ($(MAKECMDGOALS), clean)
 72 | -include $(cpp_mk) $(cu_mk)
 73 | endif
 74 | 
 75 | pro    : workspace/pro
 76 | 
 77 | workspace/pro : $(cpp_objs) $(cu_objs)
 78 | 	@echo Link $@
 79 | 	@mkdir -p $(dir $@)
 80 | 	@g++ $^ -o $@ $(link_flags)
 81 | 
 82 | 
 83 | build/%.o : src/%.cpp
 84 | 	@echo Compile CXX $<
 85 | 	@mkdir -p $(dir $@)
 86 | 	@g++ -c $< -o $@ $(cpp_compile_flags)
 87 | 
 88 | build/%.cuo : src/%.cu
 89 | 	@echo Compile CUDA $<
 90 | 	@mkdir -p $(dir $@)
 91 | 	@nvcc -c $< -o $@ $(cu_compile_flags)
 92 | 
 93 | build/%.mk : src/%.cpp
 94 | 	@echo Compile depends CXX $<
 95 | 	@mkdir -p $(dir $@)
 96 | 	@g++ -M $< -MF $@ -MT $(@:.mk=.o) $(cpp_compile_flags)
 97 | 	
 98 | build/%.cumk : src/%.cu
 99 | 	@echo Compile depends CUDA $<
100 | 	@mkdir -p $(dir $@)
101 | 	@nvcc -M $< -MF $@ -MT $(@:.cumk=.o) $(cu_compile_flags)
102 | 
103 | 
104 | bev : workspace/pro
105 | 	@cd workspace && ./pro fastbev
106 | 
107 | clean :
108 | 	@rm -rf build workspace/pro python/trtpy/libtrtpyc.so python/build python/dist python/trtpy.egg-info python/trtpy/__pycache__
109 | 	@rm -rf build
110 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Detail readme.md is coming soon 
 3 | 
 4 | # how to use
 5 | 
 6 | pytorch to onnx
 7 | 
 8 | https://github.com/thfylsty/FastBEV
 9 |  
10 | make bev -j8
11 | 
12 | 
13 | # Reference:
14 | 
15 | TensorRT
16 | 
17 | https://github.com/shouxieai/tensorRT_Pro


--------------------------------------------------------------------------------
/src/application/app_fastbev.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <builder/trt_builder.hpp>
  3 | #include <infer/trt_infer.hpp>
  4 | #include <common/ilogger.hpp>
  5 | #include "fastbev.hpp"
  6 | #include<ctime>
  7 | 
  8 | using namespace std;
  9 | 
 10 | 
 11 | static const char* label_map[] = {
 12 |  "Pedestrian", "Car","MotorcyleRider", "Crane", "Motorcycle", "Bus", "BicycleRider", "Van", "Excavator", "TricycleRider","Truck"
 13 | };
 14 | static void append_to_file(const string& file, const string& data){
 15 |     FILE* f = fopen(file.c_str(), "a+");
 16 |     if(f == nullptr){
 17 |         INFOE("Open %s failed.", file.c_str());
 18 |         return;
 19 |     }
 20 | 
 21 |     fprintf(f, "%s\n", data.c_str());
 22 |     fclose(f);
 23 | }
 24 | 
 25 | std::vector<float> rotate_box(float x1, float y1, float x2, float y2, float r) {
 26 |     // Step 1: Translate coordinates to top-left corner
 27 |     float cx = (x1 + x2) / 2.0f;
 28 |     float cy = (y1 + y2) / 2.0f;
 29 |     x1 -= cx;
 30 |     y1 -= cy;
 31 |     x2 -= cx;
 32 |     y2 -= cy;
 33 | 
 34 |     // Step 2: Convert angle to radians
 35 |     r = r * M_PI / 180.0f;
 36 | 
 37 |     // Step 3: Compute rotation matrix
 38 |     float cos_r = cos(r);
 39 |     float sin_r = sin(r);
 40 | 
 41 |     // Step 4: Rotate box vertices
 42 |     float x1_new = cos_r * x1 - sin_r * y1;
 43 |     float y1_new = sin_r * x1 + cos_r * y1;
 44 |     float x2_new = cos_r * x2 - sin_r * y2;
 45 |     float y2_new = sin_r * x2 + cos_r * y2;
 46 | 
 47 |     // Step 5: Translate coordinates back to original position
 48 |     x1_new += cx;
 49 |     y1_new += cy;
 50 |     x2_new += cx;
 51 |     y2_new += cy;
 52 | 
 53 |     // Step 6: Pack rotated box coordinates into vector and return
 54 |     std::vector<float> rotated_box = {x1_new, y1_new, x2_new, y2_new};
 55 |     return rotated_box;
 56 | }
 57 | 
 58 | void forward(shared_ptr<Fastbev::Infer> &engine, Fastbev::Image &images,cv::Mat &bevimg){
 59 | 
 60 |     auto boxes = engine->commit(images).get();
 61 |     // printf("boxes [%d]\n",boxes.size());
 62 |     int bevsize_w = 1000;
 63 |     int bevsize_h = 600;
 64 |     cv::Mat img(bevsize_h, bevsize_w, CV_8UC3, cv::Scalar(255,255,255));
 65 |     for(auto& obj : boxes){
 66 |         // printf("class[%s] confidence[%f] label[%d] x[%f] y[%f] z[%f] dx[%f] dy[%f] dz[%f] rot[%f]  \n",
 67 |             // label_map[obj.label],obj.confidence,obj.label,obj.x,obj.y,obj.z,obj.dx,obj.dy,obj.dz,obj.rot);
 68 | 
 69 |         // Calculate the four corner points of the rotated rectangle
 70 |         uint8_t b, g, r;
 71 |         tie(b, g, r) = iLogger::random_color(obj.label + 1);
 72 | 
 73 |         int x = bevsize_w - (obj.y + 50)*10;
 74 |         int y = bevsize_h - obj.x * 10;
 75 |         int w = obj.dx * 10;
 76 |         int h = obj.dy * 10;
 77 |         int rot = int(90 - obj.rot/3.1415926*180 + 360)%180;
 78 |         // printf("%d %d %d %d %d \n",x,y,x+w,y+h,r);
 79 |         cv::RotatedRect box(cv::Point(x, y), cv::Size(w, h), rot);
 80 |         cv::Point2f vertex[4];
 81 | 	    box.points(vertex);
 82 |         for (int i = 0; i < 4; i++)
 83 |             cv::line(img, vertex[i], vertex[(i + 1) % 4], cv::Scalar(b, g, r),10,cv::LINE_AA);
 84 | 
 85 |         auto caption = iLogger::format("[%s %.2f]",  label_map[obj.label],obj.confidence);
 86 |         cv::putText(img, caption, (cv::Point(x, y-w-10)), 0, 0.5, cv::Scalar(b, g, r), 1, 16);
 87 | 
 88 |         cv::circle(img, cv::Point(bevsize_w/2,bevsize_h), 20, cv::Scalar(0, 0, 0), cv::FILLED);
 89 | 
 90 |     }
 91 |     bevimg = img;
 92 |     cv::imwrite("result.png", img);
 93 | }
 94 | 
 95 | 
 96 | 
 97 | static void inference_and_performance(int deviceid, const string& engine_file, TRT::Mode mode, const string& model_name,const string& imgpath){
 98 | 
 99 |     auto engine = Fastbev::create_infer(
100 |         engine_file,                // engine file
101 |         deviceid,                   // gpu id
102 |         0.9f,                      // confidence threshold
103 |         0.45f,                      // nms threshold
104 |         Fastbev::NMSMethod::FastGPU,   // NMS method, fast GPU / CPU
105 |         1024,                       // max objects
106 |         false                       // preprocess use multi stream
107 |     );
108 |     if(engine == nullptr){
109 |         INFOE("Engine is nullptr");
110 |         return;
111 |     }
112 |     
113 |     
114 |     std::vector<cv::Mat> images_mat;
115 |     auto imagef = cv::imread("./images/2022-05-12-11-24-22_000005_front.png");
116 |     auto imagel = cv::imread("./images/2022-05-12-11-24-22_000005_left.png");
117 |     auto imager = cv::imread("./images/2022-05-12-11-24-22_000005_right.png");
118 | 
119 |     images_mat.emplace_back(imagef);
120 |     images_mat.emplace_back(imagel);
121 |     images_mat.emplace_back(imager);
122 | 
123 |     Fastbev::Image images(images_mat);
124 |     auto boxes = engine->commit(images).get();
125 |     for(auto& obj : boxes)
126 |         printf("class[%s] confidence[%f] label[%d] x[%f] y[%f] z[%f] dx[%f] dy[%f] dz[%f] rot[%f]  \n",
127 |             label_map[obj.label],obj.confidence,obj.label,obj.x,obj.y,obj.z,obj.dx,obj.dy,obj.dz,obj.rot);
128 | 
129 |     printf("input images height %d width %d nums %d \n",images.get_height(),images.get_width(),images.get_nums());
130 | 
131 | 
132 |     // warmup
133 |     for(int i = 0; i < 10; ++i)
134 |         auto boxes = engine->commit(images).get();
135 | 
136 | 
137 |     int test_nums = 20;
138 |     auto begin_timer = iLogger::timestamp_now_float();
139 |     for(int i = 0; i < test_nums; ++i)
140 |         auto boxes = engine->commit(images).get();
141 |     float inference_average_time = (iLogger::timestamp_now_float() - begin_timer) / test_nums;
142 |     INFO(" ==== average: %.2f ms / iter, FPS: %.2f === ", inference_average_time, 1000 / inference_average_time);
143 | 
144 |     std::vector<cv::Mat> allimages;
145 | 
146 |     for(int idx =0 ;idx < 160 ;idx ++){
147 |         std::ostringstream oss;
148 |         oss << std::setw(6) << std::setfill('0') << idx;
149 | 
150 |         std::string imgfile = "./roadsideimages/2022-05-09-08-47-43_" +oss.str(); //+ "_front.png"
151 |         imagef = cv::imread(imgfile+"_front.png");
152 |         imagel = cv::imread(imgfile+"_left.png");
153 |         imager = cv::imread(imgfile+"_right.png");
154 |         std::cout << imgfile+"_front.png" <<std::endl;
155 |         images_mat.clear();
156 |         images_mat.emplace_back(imagef);
157 |         images_mat.emplace_back(imagel);
158 |         images_mat.emplace_back(imager);
159 |         Fastbev::Image images_(images_mat);
160 | 
161 |         cv::Mat bevimg;
162 |         forward(engine,images_,bevimg);
163 |         std::string resultfile = "./results/2022-05-09-08-47-43_" +oss.str()+ "_bev.png"; 
164 | 
165 | 
166 |         cv::Mat imageflr;
167 |         cv::Mat allimg;
168 |         cv::hconcat(imagef, imagel, imageflr);
169 |         cv::hconcat(imageflr, imager, imageflr); // 3840, 720
170 |         cv::resize(imageflr, imageflr, cv::Size(3000, 720));
171 |         
172 |         cv::resize(bevimg, bevimg, cv::Size(3000, bevimg.rows*3));
173 |         cv::vconcat(imageflr, bevimg, allimg); 
174 |         cv::imwrite(resultfile, allimg);
175 |         allimages.push_back(allimg);
176 |     }
177 |     std::string outputmp4 = "./results/output.mp4"; 
178 |     cv::Size frame_size = allimages[0].size();
179 |     cv::VideoWriter video(outputmp4, cv::VideoWriter::fourcc('m', 'p', '4', 'v'), 10, frame_size, true);
180 |     for (const auto& image : allimages) {
181 |         video.write(image);
182 |     }
183 |     video.release();
184 | 
185 |     
186 |     // /////////////////////////////////////////////////////////
187 |     // const int ntest = 100;
188 |     // auto begin_timer = iLogger::timestamp_now_float();
189 | 
190 |     // for(int i  = 0; i < ntest; ++i)
191 |     //     boxes_array = engine->commits(images);
192 |     // for (auto & image :images){
193 |     //     boxes_array.emplace_back(engine->commit(image));
194 |     // }
195 | 
196 |     // // wait all result
197 |     // boxes_array.back().get();
198 | 
199 |     // float inference_average_time = (iLogger::timestamp_now_float() - begin_timer) / ntest / images.get_nums();
200 |     // auto mode_name = TRT::mode_string(mode);
201 |     // INFO("%s average: %.2f ms / image, FPS: %.2f", engine_file.c_str(), inference_average_time, 1000 / inference_average_time);
202 |     // append_to_file("perf.result.log", iLogger::format("%s,%s,%f", model_name.c_str(), mode_name, inference_average_time));
203 | 
204 |     
205 |     // string root = iLogger::format("%s_%s_%s_result", imgpath.c_str(),model_name.c_str(), mode_name);
206 |     // iLogger::rmtree(root);
207 |     // iLogger::mkdir(root);
208 | 
209 |     // for(int i = 0; i < boxes_array.size(); ++i){
210 | 
211 |     //     // auto& image = images[i].cvmat;
212 |     //     auto boxes  = boxes_array[i].get();
213 |         
214 |     //     for(auto& obj : boxes){
215 |     //         printf("[%f %f %f][%d %f]\n",obj.x,obj.y,obj.z,obj.label,obj.confidence);
216 |     //         // uint8_t b, g, r;
217 |     //         // tie(b, g, r) = iLogger::random_color(obj.class_label);
218 |     //         // cv::rectangle(image, cv::Point(obj.left, obj.top), cv::Point(obj.right, obj.bottom), cv::Scalar(b, g, r), 2);
219 | 
220 |     //         // auto name    = cocolabels[obj.class_label];
221 |     //         // auto caption = iLogger::format("%s [%.2f %.2f]",  name,obj.confidence,obj.depth);
222 |     //         // int width    = cv::getTextSize(caption, 0, 0.5, 1, nullptr).width + 1;
223 |     //         // cv::rectangle(image, cv::Point(obj.left-3, obj.top-20), cv::Point(obj.left + width, obj.top), cv::Scalar(b, g, r), -1);
224 |     //         // cv::putText(image, caption, cv::Point(obj.left, obj.top-5), 0, 0.5, cv::Scalar::all(0), 1, 16);
225 |     //     }
226 | 
227 |     //     // string file_name = iLogger::file_name(files[i], false);
228 |     //     // string save_path = iLogger::format("%s/%s.jpg", root.c_str(), file_name.c_str());
229 |     //     // INFO("Save to %s, %d object, average time %.2f ms", save_path.c_str(), boxes.size(), inference_average_time);
230 |     //     // cv::imwrite(save_path, image);
231 |     //     // cv::imshow("f", image);cv::waitKey(0);
232 |     // }
233 |     engine.reset();
234 | }
235 | 
236 | static void test(TRT::Mode mode, const string& model,const string& imgpath,const int batch_size){
237 | 
238 |     int deviceid = 0;
239 |     auto mode_name = TRT::mode_string(mode);
240 |     TRT::set_device(deviceid);
241 | 
242 | 
243 |     const char* name = model.c_str();
244 |     INFO("===================== test %s %s ==================================", mode_name, name);
245 | 
246 | 
247 |     string onnx_file = iLogger::format("onnxs/%s.onnx", name);
248 |     string model_file = iLogger::format("engines/%s.%s.bs%d.engine", name, mode_name, batch_size);
249 |     std::cout << model_file<< std::endl;
250 |     int test_batch_size = batch_size;
251 |     
252 |     if(not iLogger::exists(model_file)){
253 |         TRT::compile(
254 |             mode,                       // FP32、FP16、INT8
255 |             test_batch_size,            // max batch size
256 |             onnx_file,                  // source 
257 |             model_file,                 // save to
258 |             {},
259 |             nullptr,
260 |             "inference"
261 |         );
262 |     }
263 | 
264 |     inference_and_performance(deviceid, model_file, mode, name,imgpath);
265 |     
266 | }
267 | 
268 | 
269 | int app_fastbev(){
270 | 
271 |     // test(TRT::Mode::FP32, "roadside_train_half_res_aug_20230405-2208—epoch_50_20230414-1135","images",1);
272 |     test(TRT::Mode::FP16, "roadside_train_half_res_aug_20230405-2208—epoch_50_20230414-1135","images",1);
273 | 
274 |     return 0;
275 | }


--------------------------------------------------------------------------------
/src/application/fastbev.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef FASTBEV_HPP
 3 | #define FASTBEV_HPP
 4 | 
 5 | #include <vector>
 6 | #include <memory>
 7 | #include <string>
 8 | #include <future>
 9 | #include <opencv2/opencv.hpp>
10 | #include <common/trt_tensor.hpp>
11 | 
12 | #include <common/cuda_tools.hpp>
13 | #include <common/preprocess_kernel.cuh>
14 | namespace Fastbev{
15 | 
16 |     using namespace std;
17 | 
18 |     enum class ImageType : int{
19 |         CVMat  = 0,
20 |         GPUYUV = 1    // nv12
21 |     };
22 | 
23 |     struct Image{
24 |         ImageType type = ImageType::CVMat;
25 |         cv::Mat cvmat;
26 |         std::vector<cv::Mat> cvmats;
27 | 
28 |         // GPU YUV image
29 |         TRT::CUStream stream = nullptr;
30 |         // uint8_t* device_data = nullptr;
31 |         int width = 0, height = 0;
32 |         int device_id = 0;
33 | 
34 |         Image() = default;
35 |         Image(const std::vector<cv::Mat>& cvmats):cvmats(cvmats), type(ImageType::CVMat),
36 |                                                 width(cvmats[0].cols),height(cvmats[0].cols){}
37 | 
38 |         int get_nums() const{return  cvmats.size(); }
39 |         int get_width() const{return cvmats[0].cols;}
40 |         int get_height() const{return cvmats[0].rows;}
41 |         cv::Size get_size() const{return cv::Size(get_width(), get_height()*3);}
42 |         bool empty() const{return cvmats.size()==0 || cvmats[0].empty();}
43 | 
44 | 
45 |     };
46 | 
47 | 
48 |     struct Box{
49 |         float x,y,z,dx,dy,dz,rot,confidence;
50 |         int label;
51 | 
52 |         Box() = default;
53 | 
54 |         Box(float x, float y, float z, float dx, float dy, float dz,  float rot, float confidence, int label)
55 |         :x(x), y(y), z(z), dx(dx),dy(dy),dz(dz),rot(rot), confidence(confidence), label(label){}
56 |     };
57 | 
58 |     typedef std::vector<Box> BoxArray;
59 | 
60 | 
61 |     enum class NMSMethod : int{
62 |         CPU = 0,         // General, for estimate mAP
63 |         FastGPU = 1      // Fast NMS with a small loss of accuracy in corner cases
64 |     };
65 | 
66 |     // void image_to_tensor(const cv::Mat& image, shared_ptr<TRT::Tensor>& tensor, int ibatch);
67 | 
68 |     class Infer{
69 |     public:
70 |         virtual shared_future<BoxArray> commit(const Image& image) = 0;
71 |     };
72 | 
73 |     shared_ptr<Infer> create_infer(
74 |         const string& engine_file,  int gpuid,
75 |         float confidence_threshold=0.25f, float nms_threshold=0.5f,
76 |         NMSMethod nms_method = NMSMethod::FastGPU, int max_objects = 1024,
77 |         bool use_multi_preprocess_stream = false
78 |     );
79 | 
80 | 
81 | }; // namespace Fastbev
82 | 
83 | #endif 


--------------------------------------------------------------------------------
/src/application/fastbev_decode.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include <common/cuda_tools.hpp>
  4 | 
  5 | namespace Fastbev{
  6 | 
  7 |     const int NUM_BOX_ELEMENT = 10;       // fastbev:  x y z dx dy dz r  conf class  keepflag
  8 |     // static __device__ void affine_project(float* matrix, float x, float y, float* ox, float* oy){
  9 |     //     *ox = matrix[0] * x + matrix[1] * y + matrix[2];
 10 |     //     *oy = matrix[3] * x + matrix[4] * y + matrix[5];
 11 |     // }
 12 | 
 13 |     static __global__ void decode_kernel(float* predict, int num_bboxes, int num_classes, float confidence_threshold, float* parray, int max_objects){  
 14 | 
 15 |         int position = blockDim.x * blockIdx.x + threadIdx.x;
 16 | 		if (position >= num_bboxes) return;
 17 |  
 18 |         float* pitem     = predict + 20 * position; //  fastbev: 7+2+11  x y z dx dy dz r d classnum 
 19 |         // float objectness = pitem[4];
 20 |         // if(objectness < confidence_threshold)
 21 |         //     return;
 22 |         float* class_confidence = pitem;
 23 |         float *conf_tmp = class_confidence;
 24 |         float confidence = *class_confidence++;
 25 |         int label               = 0;
 26 |         for(int i = 1; i < num_classes; ++i, ++class_confidence){
 27 |             if(*class_confidence > confidence){
 28 |                 confidence = *class_confidence;
 29 |                 label      = i;
 30 |             }
 31 |         }
 32 |         // printf("confidence_threshold[%f] %f",confidence_threshold,*pitem);
 33 | 
 34 |         // confidence *= objectness;
 35 |         if(confidence < confidence_threshold)
 36 |             return;
 37 | 
 38 |         int index = atomicAdd(parray, 1);
 39 |         if(index >= max_objects)
 40 |             return;
 41 |         
 42 |         pitem = pitem+num_classes;
 43 |         float x     = *pitem++;
 44 |         float y     = *pitem++;
 45 |         float z     = *pitem++;
 46 |         float dx    = *pitem++;
 47 |         float dy    = *pitem++;
 48 |         float dz    = *pitem++;
 49 |         float r     = *pitem++;
 50 |         float dir1   = *pitem++;
 51 |         float dir2   = *pitem++;
 52 | 
 53 |         // printf("label[%d] | confidence[%f] | x[%f] | y[%f] | z[%f] | dx[%f] | dy[%f] | dz[%f] | r[%f] | dir[%f]   \n",label,confidence,x,y,z,dx,dy,dz,r, dir);
 54 |         
 55 |         float* pout_item = parray + 1 + index * NUM_BOX_ELEMENT;
 56 |         *pout_item++ = x;
 57 |         *pout_item++ = y;
 58 |         *pout_item++ = z;
 59 |         *pout_item++ = dx;
 60 |         *pout_item++ = dy;
 61 |         *pout_item++ = dz;
 62 |         *pout_item++ = r + (dir1 > dir2 ? 0 : 1)*3.1415926;
 63 |         *pout_item++ = confidence;
 64 |         *pout_item++ = label;
 65 |         *pout_item++ = 1.0; // 1 = keep, 0 = ignore
 66 | 
 67 |     }
 68 | 
 69 |     static __device__ float box_iou(
 70 |         float aleft, float atop, float aright, float abottom, 
 71 |         float bleft, float btop, float bright, float bbottom
 72 |     ){
 73 | 
 74 |         float cleft 	= max(aleft, bleft);
 75 |         float ctop 		= max(atop, btop);
 76 |         float cright 	= min(aright, bright);
 77 |         float cbottom 	= min(abottom, bbottom);
 78 |         
 79 |         float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f);
 80 |         if(c_area == 0.0f)
 81 |             return 0.0f;
 82 |         
 83 |         float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop);
 84 |         float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop);
 85 |         return c_area / (a_area + b_area - c_area);
 86 |     }
 87 | 
 88 |     static __global__ void nms_kernel(float* bboxes, int max_objects, float threshold){
 89 | 
 90 |         int position = (blockDim.x * blockIdx.x + threadIdx.x);
 91 |         int count = min((int)*bboxes, max_objects);
 92 |         if (position >= count) 
 93 |             return;
 94 | 
 95 |         // x y z dx dy dz r  conf class  keepflag
 96 |         float* pcurrent = bboxes + 1 + position * NUM_BOX_ELEMENT;
 97 |         for(int i = 0; i < count; ++i){
 98 |             float* pitem = bboxes + 1 + i * NUM_BOX_ELEMENT;
 99 |             if(i == position || pcurrent[8] != pitem[8]) continue;
100 |             
101 |             if(pitem[7] >= pcurrent[7]){
102 |                 if(pitem[7] == pcurrent[7] && i < position)
103 |                     continue;
104 | 
105 |                 float p_x=pcurrent[0]; float p_y=pcurrent[1];float p_dx=pcurrent[3];float p_dy=pcurrent[4];
106 |                 float n_x=pitem[0]; float n_y=pitem[1];float n_dx=pitem[3];float n_dy=pitem[4];
107 |                 
108 |                 float iou = box_iou(
109 |                     p_x,p_y,p_x + p_dx,p_y + p_dy,
110 |                     n_x,n_y,n_x +n_dx,n_y +n_dy
111 |                 );
112 |                 if(iou > threshold){
113 |                     pcurrent[9] = 0;  // 1=keep, 0=ignore
114 |                     return;
115 |                 }
116 |             }
117 |         }
118 |     }
119 | 
120 |     void decode_kernel_invoker(float* predict, int num_bboxes, int num_classes, float confidence_threshold,  float* parray, int max_objects, cudaStream_t stream){
121 |         
122 |         auto grid = CUDATools::grid_dims(num_bboxes);
123 |         auto block = CUDATools::block_dims(num_bboxes);
124 | 
125 | 
126 |         checkCudaKernel(decode_kernel<<<grid, block, 0, stream>>>(predict, num_bboxes, num_classes, confidence_threshold, parray, max_objects));
127 |     }
128 | 
129 |     void nms_kernel_invoker(float* parray, float nms_threshold, int max_objects, cudaStream_t stream){
130 |         
131 |         auto grid = CUDATools::grid_dims(max_objects);
132 |         auto block = CUDATools::block_dims(max_objects);
133 |         checkCudaKernel(nms_kernel<<<grid, block, 0, stream>>>(parray, max_objects, nms_threshold));
134 |     }
135 | };


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | #include <common/ilogger.hpp>
 4 | #include <functional>
 5 | 
 6 | int app_fastbev();
 7 | 
 8 | int main(){
 9 |     app_fastbev();
10 |     return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/src/tensorRT/builder/trt_builder.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #ifndef TRT_BUILDER_HPP
  4 | #define TRT_BUILDER_HPP
  5 | 
  6 | #include <string>
  7 | #include <vector>
  8 | #include <functional>
  9 | #include <infer/trt_infer.hpp>
 10 | 
 11 | namespace TRT {
 12 | 
 13 | 	typedef std::function<void(int current, int count, const std::vector<std::string>& files, std::shared_ptr<Tensor>& tensor)> Int8Process;
 14 | 	typedef std::function<std::vector<int64_t>(const std::string& name, const std::vector<int64_t>& shape)> LayerHookFuncReshape;
 15 | 
 16 | 	enum class ModelSourceType : int{
 17 | 		OnnX,
 18 | 		OnnXData
 19 | 	};
 20 | 
 21 | 	class ModelSource {
 22 | 	public:
 23 | 		ModelSource() = default;
 24 | 		ModelSource(const std::string& onnxmodel);
 25 | 		ModelSource(const char* onnxmodel);
 26 | 		ModelSourceType type() const;
 27 | 		std::string onnxmodel() const;
 28 | 		std::string descript() const;
 29 | 		const void* onnx_data() const;
 30 | 		size_t onnx_data_size() const;
 31 | 
 32 | 		static ModelSource onnx(const std::string& file){
 33 | 			ModelSource output;
 34 | 			output.onnxmodel_  = file;
 35 | 			output.type_       = ModelSourceType::OnnX;
 36 | 			return output;
 37 | 		}
 38 | 
 39 | 		static ModelSource onnx_data(const void* ptr, size_t size){
 40 | 			ModelSource output;
 41 | 			output.onnx_data_      = ptr;
 42 | 			output.onnx_data_size_ = size;
 43 | 			output.type_           = ModelSourceType::OnnXData;
 44 | 			return output;
 45 | 		}
 46 | 
 47 | 	private:
 48 | 		std::string onnxmodel_;
 49 | 		const void* onnx_data_ = nullptr;
 50 | 		size_t onnx_data_size_ = 0;
 51 | 		ModelSourceType type_;
 52 | 	};
 53 | 
 54 | 	enum class CompileOutputType : int{
 55 | 		File,
 56 | 		Memory
 57 | 	};
 58 | 
 59 | 	class CompileOutput{
 60 | 	public:
 61 | 		CompileOutput(CompileOutputType type = CompileOutputType::Memory);
 62 | 		CompileOutput(const std::string& file);
 63 | 		CompileOutput(const char* file);
 64 | 		void set_data(const std::vector<uint8_t>& data);
 65 | 		void set_data(std::vector<uint8_t>&& data);
 66 | 
 67 | 		const std::vector<uint8_t>& data() const{return data_;};
 68 | 		CompileOutputType type() const{return type_;}
 69 | 		std::string file() const{return file_;}
 70 | 
 71 | 	private:
 72 | 		CompileOutputType type_ = CompileOutputType::Memory;
 73 | 		std::vector<uint8_t> data_;
 74 | 		std::string file_;
 75 | 	};
 76 | 
 77 | 	class InputDims {
 78 | 	public:
 79 | 		InputDims() = default;
 80 | 		
 81 | 		// 当为-1时，保留导入时的网络结构尺寸
 82 | 		InputDims(const std::initializer_list<int>& dims);
 83 | 		InputDims(const std::vector<int>& dims);
 84 | 
 85 | 		const std::vector<int>& dims() const;
 86 | 
 87 | 	private:
 88 | 		std::vector<int> dims_;
 89 | 	};
 90 | 
 91 | 	enum class Mode : int {
 92 | 		FP32,
 93 | 		FP16,
 94 | 		INT8
 95 | 	};
 96 | 
 97 | 	const char* mode_string(Mode type);
 98 | 
 99 | 	void set_layer_hook_reshape(const LayerHookFuncReshape& func);
100 | 
101 | 	/** 当处于INT8模式时，int8process必须制定
102 | 	     int8ImageDirectory和int8EntropyCalibratorFile指定一个即可
103 | 	     如果初次生成，指定了int8EntropyCalibratorFile，calibrator会保存到int8EntropyCalibratorFile指定的文件
104 | 	     如果已经生成过，指定了int8EntropyCalibratorFile，calibrator会从int8EntropyCalibratorFile指定的文件加载，而不是
105 | 	          从int8ImageDirectory读取图片再重新生成
106 | 		当处于FP32或者FP16时，int8process、int8ImageDirectory、int8EntropyCalibratorFile都不需要指定 
107 | 		对于嵌入式设备，请把maxWorkspaceSize设置小一点，比如128MB = 1ul << 27
108 | 	**/
109 | 	bool compile(
110 | 		Mode mode,
111 | 		unsigned int maxBatchSize,
112 | 		const ModelSource& source,
113 | 		const CompileOutput& saveto,
114 | 		const std::vector<InputDims> inputsDimsSetup = {},
115 | 		Int8Process int8process = nullptr,
116 | 		const std::string& int8ImageDirectory = "",
117 | 		const std::string& int8EntropyCalibratorFile = "",
118 | 		const size_t maxWorkspaceSize = 2ul << 30                // 1ul << 30 = 1GB
119 | 	);
120 | };
121 | 
122 | #endif //TRT_BUILDER_HPP


--------------------------------------------------------------------------------
/src/tensorRT/common/cuda_tools.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  *  系统关于CUDA的功能函数
 4 |  */
 5 | 
 6 | 
 7 | #include "cuda_tools.hpp"
 8 | 
 9 | namespace CUDATools{
10 |     bool check_driver(CUresult e, const char* call, int line, const char *file) {
11 |         if (e != CUDA_SUCCESS) {
12 | 
13 |             const char* message = nullptr;
14 |             const char* name = nullptr;
15 |             cuGetErrorString(e, &message);
16 |             cuGetErrorName(e, &name);
17 |             INFOE("CUDA Driver error %s # %s, code = %s [ %d ] in file %s:%d", call, message, name, e, file, line);
18 |             return false;
19 |         }
20 |         return true;
21 |     }
22 | 
23 |     bool check_runtime(cudaError_t e, const char* call, int line, const char *file){
24 |         if (e != cudaSuccess) {
25 |             INFOE("CUDA Runtime error %s # %s, code = %s [ %d ] in file %s:%d", call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line);
26 |             return false;
27 |         }
28 |         return true;
29 |     }
30 | 
31 |     bool check_device_id(int device_id){
32 |         int device_count = -1;
33 |         checkCudaRuntime(cudaGetDeviceCount(&device_count));
34 |         if(device_id < 0 || device_id >= device_count){
35 |             INFOE("Invalid device id: %d, count = %d", device_id, device_count);
36 |             return false;
37 |         }
38 |         return true;
39 |     }
40 | 
41 |     int current_device_id(){
42 |         int device_id = 0;
43 |         checkCudaRuntime(cudaGetDevice(&device_id));
44 |         return device_id;
45 |     }
46 | 
47 |     dim3 grid_dims(int numJobs) {
48 |         int numBlockThreads = numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
49 |         return dim3(((numJobs + numBlockThreads - 1) / (float)numBlockThreads));
50 |     }
51 | 
52 |     dim3 block_dims(int numJobs) {
53 |         return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
54 |     }
55 | 
56 |     std::string device_capability(int device_id){
57 |         cudaDeviceProp prop;
58 |         checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id));
59 |         return iLogger::format("%d.%d", prop.major, prop.minor);
60 |     }
61 | 
62 |     std::string device_name(int device_id){
63 |         cudaDeviceProp prop;
64 |         checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id));
65 |         return prop.name;
66 |     }
67 | 
68 |     std::string device_description(){
69 | 
70 |         cudaDeviceProp prop;
71 |         size_t free_mem, total_mem;
72 |         int device_id = 0;
73 | 
74 |         checkCudaRuntime(cudaGetDevice(&device_id));
75 |         checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id));
76 |         checkCudaRuntime(cudaMemGetInfo(&free_mem, &total_mem));
77 | 
78 |         return iLogger::format(
79 |             "[ID %d]<%s>[arch %d.%d][GMEM %.2f GB/%.2f GB]",
80 |             device_id, prop.name, prop.major, prop.minor, 
81 |             free_mem / 1024.0f / 1024.0f / 1024.0f,
82 |             total_mem / 1024.0f / 1024.0f / 1024.0f
83 |         );
84 |     }
85 | 
86 |     AutoDevice::AutoDevice(int device_id){
87 | 
88 |         cudaGetDevice(&old_);
89 |         checkCudaRuntime(cudaSetDevice(device_id));
90 |     }
91 | 
92 |     AutoDevice::~AutoDevice(){
93 |         checkCudaRuntime(cudaSetDevice(old_));
94 |     }
95 | }


--------------------------------------------------------------------------------
/src/tensorRT/common/cuda_tools.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CUDA_TOOLS_HPP
 2 | #define CUDA_TOOLS_HPP
 3 | 
 4 | /*
 5 |  *  系统关于CUDA的功能函数
 6 |  */
 7 |  
 8 | #include <cuda.h>
 9 | #include <cuda_runtime.h>
10 | #include "ilogger.hpp"
11 | 
12 | #define GPU_BLOCK_THREADS  512
13 | 
14 | 
15 | #define KernelPositionBlock											\
16 | 	int position = (blockDim.x * blockIdx.x + threadIdx.x);		    \
17 |     if (position >= (edge)) return;
18 | 
19 | 
20 | #define checkCudaDriver(call)  CUDATools::check_driver(call, #call, __LINE__, __FILE__)
21 | #define checkCudaRuntime(call) CUDATools::check_runtime(call, #call, __LINE__, __FILE__)
22 | 
23 | #define checkCudaKernel(...)                                                                         \
24 |     __VA_ARGS__;                                                                                     \
25 |     do{cudaError_t cudaStatus = cudaPeekAtLastError();                                               \
26 |     if (cudaStatus != cudaSuccess){                                                                  \
27 |         INFOE("launch failed: %s", cudaGetErrorString(cudaStatus));                                  \
28 |     }} while(0);
29 | 
30 | 
31 | #define Assert(op)					 \
32 | 	do{                              \
33 | 		bool cond = !(!(op));        \
34 | 		if(!cond){                   \
35 | 			INFOF("Assert failed, " #op);  \
36 | 		}                                  \
37 | 	}while(false)
38 | 
39 | 
40 | struct CUctx_st;
41 | struct CUstream_st;
42 | 
43 | typedef CUstream_st* ICUStream;
44 | typedef CUctx_st* ICUContext;
45 | typedef void* ICUDeviceptr;
46 | typedef int DeviceID;
47 | 
48 | namespace CUDATools{
49 |     bool check_driver(CUresult e, const char* call, int iLine, const char *szFile);
50 |     bool check_runtime(cudaError_t e, const char* call, int iLine, const char *szFile);
51 |     bool check_device_id(int device_id);
52 |     int current_device_id();
53 | 
54 |     dim3 grid_dims(int numJobs);
55 |     dim3 block_dims(int numJobs);
56 | 
57 |     // return 8.6  etc.
58 |     std::string device_capability(int device_id);
59 |     std::string device_name(int device_id);
60 |     std::string device_description();
61 | 
62 |     class AutoDevice{
63 |     public:
64 |         AutoDevice(int device_id = 0);
65 |         virtual ~AutoDevice();
66 |     
67 |     private:
68 |         int old_ = -1;
69 |     };
70 | }
71 | 
72 | 
73 | #endif // CUDA_TOOLS_HPP


--------------------------------------------------------------------------------
/src/tensorRT/common/ilogger.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef ILOGGER_HPP
  3 | #define ILOGGER_HPP
  4 | 
  5 | 
  6 | #include <string>
  7 | #include <vector>
  8 | #include <tuple>
  9 | #include <time.h>
 10 | 
 11 | 
 12 | #if defined(_WIN32)
 13 | #	define U_OS_WINDOWS
 14 | #else
 15 | #   define U_OS_LINUX
 16 | #endif
 17 | 
 18 | 
 19 | namespace iLogger{
 20 | 
 21 |     using namespace std;
 22 | 
 23 |     enum class LogLevel : int{
 24 |         Debug   = 5,
 25 |         Verbose = 4,
 26 |         Info    = 3,
 27 |         Warning = 2,
 28 |         Error   = 1,
 29 |         Fatal   = 0
 30 |     };
 31 | 
 32 |     #define INFOD(...)			iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Debug, __VA_ARGS__)
 33 |     #define INFOV(...)			iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Verbose, __VA_ARGS__)
 34 |     #define INFO(...)			iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Info, __VA_ARGS__)
 35 |     #define INFOW(...)			iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Warning, __VA_ARGS__)
 36 |     #define INFOE(...)			iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Error, __VA_ARGS__)
 37 |     #define INFOF(...)			iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Fatal, __VA_ARGS__)
 38 | 
 39 |     string date_now();
 40 |     string time_now();
 41 |     string gmtime_now();
 42 | 	string gmtime(time_t t);
 43 | 	time_t gmtime2ctime(const string& gmt);
 44 |     void sleep(int ms);
 45 | 
 46 |     bool isfile(const string& file);
 47 |     bool mkdir(const string& path);
 48 |     bool mkdirs(const string& path);
 49 |     bool delete_file(const string& path);
 50 |     bool rmtree(const string& directory, bool ignore_fail=false);
 51 |     bool exists(const string& path);
 52 |     string format(const char* fmt, ...);
 53 |     FILE* fopen_mkdirs(const string& path, const string& mode);
 54 |     string file_name(const string& path, bool include_suffix=true);
 55 |     string directory(const string& path);
 56 |     long long timestamp_now();
 57 |     double timestamp_now_float();
 58 |     time_t last_modify(const string& file);
 59 |     vector<uint8_t> load_file(const string& file);
 60 |     string load_text_file(const string& file);
 61 |     size_t file_size(const string& file);
 62 | 
 63 |     bool begin_with(const string& str, const string& with);
 64 | 	bool end_with(const string& str, const string& with);
 65 |     vector<string> split_string(const string& str, const std::string& spstr);
 66 |     string replace_string(const string& str, const string& token, const string& value, int nreplace=-1, int* out_num_replace=nullptr);
 67 | 
 68 |     // h[0-1], s[0-1], v[0-1]
 69 |     // return, 0-255, 0-255, 0-255
 70 |     tuple<uint8_t, uint8_t, uint8_t> hsv2rgb(float h, float s, float v);
 71 |     tuple<uint8_t, uint8_t, uint8_t> random_color(int id);
 72 | 
 73 |     //   abcdefg.pnga          *.png      > false
 74 | 	//   abcdefg.png           *.png      > true
 75 | 	//   abcdefg.png          a?cdefg.png > true
 76 | 	bool pattern_match(const char* str, const char* matcher, bool igrnoe_case = true);
 77 |     vector<string> find_files(
 78 |         const string& directory, 
 79 |         const string& filter = "*", bool findDirectory = false, bool includeSubDirectory = false);
 80 | 
 81 |     string align_blank(const string& input, int align_size, char blank=' ');
 82 |     bool save_file(const string& file, const vector<uint8_t>& data, bool mk_dirs = true);
 83 |     bool save_file(const string& file, const string& data, bool mk_dirs = true);
 84 | 	bool save_file(const string& file, const void* data, size_t length, bool mk_dirs = true);
 85 | 
 86 |     // 捕获：SIGINT(2)、SIGQUIT(3)
 87 |     int while_loop();
 88 | 
 89 |     // 关于logger的api
 90 |     const char* level_string(LogLevel level);
 91 |     void set_logger_save_directory(const string& loggerDirectory);
 92 | 
 93 |     void set_log_level(LogLevel level);
 94 |     LogLevel get_log_level();
 95 |     void __log_func(const char* file, int line, LogLevel level, const char* fmt, ...);
 96 |     void destroy_logger();
 97 | 
 98 |     string base64_decode(const string& base64);
 99 |     string base64_encode(const void* data, size_t size);
100 | 
101 |     inline int upbound(int n, int align = 32){return (n + align - 1) / align * align;}
102 |     string join_dims(const vector<int64_t>& dims);
103 | };
104 | 
105 | 
106 | #endif // ILOGGER_HPP


--------------------------------------------------------------------------------
/src/tensorRT/common/infer_controller.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef INFER_CONTROLLER_HPP
  2 | #define INFER_CONTROLLER_HPP
  3 | 
  4 | #include <string>
  5 | #include <future>
  6 | #include <memory>
  7 | #include <mutex>
  8 | #include <thread>
  9 | #include <queue>
 10 | #include <condition_variable>
 11 | #include <infer/trt_infer.hpp>
 12 | #include "monopoly_allocator.hpp"
 13 | 
 14 | template<class Input, class Output, class StartParam=std::tuple<std::string, int>, class JobAdditional=int>
 15 | class InferController{
 16 | public:
 17 |     struct Job{
 18 |         Input input;
 19 |         Output output;
 20 |         JobAdditional additional;
 21 |         MonopolyAllocator<TRT::Tensor>::MonopolyDataPointer mono_tensor;
 22 |         std::shared_ptr<std::promise<Output>> pro;
 23 |     };
 24 | 
 25 |     virtual ~InferController(){
 26 |         stop();
 27 |     }
 28 | 
 29 |     void stop(){
 30 |         run_ = false;
 31 |         cond_.notify_all();
 32 | 
 33 |         ////////////////////////////////////////// cleanup jobs
 34 |         {
 35 |             std::unique_lock<std::mutex> l(jobs_lock_);
 36 |             while(!jobs_.empty()){
 37 |                 auto& item = jobs_.front();
 38 |                 if(item.pro)
 39 |                     item.pro->set_value(Output());
 40 |                 jobs_.pop();
 41 |             }
 42 |         };
 43 | 
 44 |         if(worker_){
 45 |             worker_->join();
 46 |             worker_.reset();
 47 |         }
 48 |     }
 49 | 
 50 |     bool startup(const StartParam& param){
 51 |         run_ = true;
 52 | 
 53 |         std::promise<bool> pro;
 54 |         start_param_ = param;
 55 |         worker_      = std::make_shared<std::thread>(&InferController::worker, this, std::ref(pro));
 56 |         return pro.get_future().get();
 57 |     }
 58 | 
 59 |     virtual std::shared_future<Output> commit(const Input& input){
 60 | 
 61 |         Job job;
 62 |         job.pro = std::make_shared<std::promise<Output>>();
 63 |         if(!preprocess(job, input)){
 64 |             job.pro->set_value(Output());
 65 |             return job.pro->get_future();
 66 |         }
 67 |         
 68 |         ///////////////////////////////////////////////////////////
 69 |         {
 70 |             std::unique_lock<std::mutex> l(jobs_lock_);
 71 |             jobs_.push(job);
 72 |         };
 73 |         cond_.notify_one();
 74 |         return job.pro->get_future();
 75 |     }
 76 | 
 77 |     virtual std::vector<std::shared_future<Output>> commits(const std::vector<Input>& inputs){
 78 | 
 79 |         int batch_size = std::min((int)inputs.size(), this->tensor_allocator_->capacity());
 80 |         std::vector<Job> jobs(inputs.size());
 81 |         std::vector<std::shared_future<Output>> results(inputs.size());
 82 | 
 83 |         int nepoch = (inputs.size() + batch_size - 1) / batch_size;
 84 |         for(int epoch = 0; epoch < nepoch; ++epoch){
 85 |             int begin = epoch * batch_size;
 86 |             int end   = std::min((int)inputs.size(), begin + batch_size);
 87 | 
 88 |             for(int i = begin; i < end; ++i){
 89 |                 Job& job = jobs[i];
 90 |                 job.pro = std::make_shared<std::promise<Output>>();
 91 |                 if(!preprocess(job, inputs[i])){
 92 |                     job.pro->set_value(Output());
 93 |                 }
 94 |                 results[i] = job.pro->get_future();
 95 |             }
 96 | 
 97 |             ///////////////////////////////////////////////////////////
 98 |             {
 99 |                 std::unique_lock<std::mutex> l(jobs_lock_);
100 |                 for(int i = begin; i < end; ++i){
101 |                     jobs_.emplace(std::move(jobs[i]));
102 |                 };
103 |             }
104 |             cond_.notify_one();
105 |         }
106 |         return results;
107 |     }
108 | 
109 | protected:
110 |     virtual void worker(std::promise<bool>& result) = 0;
111 |     virtual bool preprocess(Job& job, const Input& input) = 0;
112 |     
113 |     virtual bool get_jobs_and_wait(std::vector<Job>& fetch_jobs, int max_size){
114 | 
115 |         std::unique_lock<std::mutex> l(jobs_lock_);
116 |         cond_.wait(l, [&](){
117 |             return !run_ || !jobs_.empty();
118 |         });
119 | 
120 |         if(!run_) return false;
121 |         
122 |         fetch_jobs.clear();
123 |         for(int i = 0; i < max_size && !jobs_.empty(); ++i){
124 |             fetch_jobs.emplace_back(std::move(jobs_.front()));
125 |             jobs_.pop();
126 |         }
127 |         return true;
128 |     }
129 | 
130 |     virtual bool get_job_and_wait(Job& fetch_job){
131 | 
132 |         std::unique_lock<std::mutex> l(jobs_lock_);
133 |         cond_.wait(l, [&](){
134 |             return !run_ || !jobs_.empty();
135 |         });
136 | 
137 |         if(!run_) return false;
138 |         
139 |         fetch_job = std::move(jobs_.front());
140 |         jobs_.pop();
141 |         return true;
142 |     }
143 | 
144 | protected:
145 |     StartParam start_param_;
146 |     std::atomic<bool> run_;
147 |     std::mutex jobs_lock_;
148 |     std::queue<Job> jobs_;
149 |     std::shared_ptr<std::thread> worker_;
150 |     std::condition_variable cond_;
151 |     std::shared_ptr<MonopolyAllocator<TRT::Tensor>> tensor_allocator_;
152 | };
153 | 
154 | #endif // INFER_CONTROLLER_HPP


--------------------------------------------------------------------------------
/src/tensorRT/common/monopoly_allocator.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 独占分配器
  3 |  * 用以解决以下问题：
  4 |  * 1. 实现tensor复用的问题
  5 |  * 2. 对于tensor使用的两个阶段实现并行，时间重叠
  6 |  *    阶段一：预处理准备
  7 |  *    阶段二：模型推理
  8 |  * 
  9 |  * 设计思路：
 10 |  * 以海底捞吃火锅为类比，座位分为两种：堂内吃饭的座位、厅外等候的座位
 11 |  * 
 12 |  * 1. 初始状态，堂内有10个座位，厅外有10个座位，全部空
 13 |  * 2. 来了30个人吃火锅
 14 |  * 3. 流程是，先安排10个人坐在厅外修整，20人个人排队等候
 15 |  * 4. 由于堂内没人，所以调度坐在厅外的10个人进入堂内，开始吃火锅。厅外的10个座位为空
 16 |  * 5. 由于厅外没人，所以可以让排队的20人中，取10个人在厅外修整
 17 |  * 6. 此时状态为，堂内10人，厅外10人，等候10人
 18 |  * 7. 经过60分钟后，堂内10人吃完，紧接着执行步骤4
 19 |  * 
 20 |  * 在实际工作中，通常图像输入过程有预处理、推理
 21 |  * 我们的目的是让预处理和推理时间进行重叠。因此设计了一个缓冲区，类似厅外等候区的那种形式
 22 |  * 当我们输入图像时，具有2倍batch的空间进行预处理用于缓存
 23 |  * 而引擎推理时，每次拿1个batch的数据进行推理
 24 |  * 当引擎推理速度慢而预处理速度快时，输入图像势必需要进行等候。否则缓存队列会越来越大
 25 |  * 而这里提到的几个点就是设计的主要目标
 26 |  **/
 27 | 
 28 | #ifndef MONOPOLY_ALLOCATOR_HPP
 29 | #define MONOPOLY_ALLOCATOR_HPP
 30 | 
 31 | #include <condition_variable>
 32 | #include <vector>
 33 | #include <mutex>
 34 | #include <memory>
 35 | 
 36 | template<class _ItemType>
 37 | class MonopolyAllocator{
 38 | public:
 39 |     /* Data是数据容器类
 40 |        允许query获取的item执行item->release释放自身所有权，该对象可以被复用
 41 |        通过item->data()获取储存的对象的指针
 42 |     */
 43 |     class MonopolyData{
 44 |     public:
 45 |         std::shared_ptr<_ItemType>& data(){ return data_; }
 46 |         void release(){manager_->release_one(this);}
 47 | 
 48 |     private:
 49 |         MonopolyData(MonopolyAllocator* pmanager){manager_ = pmanager;}
 50 | 
 51 |     private:
 52 |         friend class MonopolyAllocator;
 53 |         MonopolyAllocator* manager_ = nullptr;
 54 |         std::shared_ptr<_ItemType> data_;
 55 |         bool available_ = true;
 56 |     };
 57 |     typedef std::shared_ptr<MonopolyData> MonopolyDataPointer;
 58 | 
 59 |     MonopolyAllocator(int size){
 60 |         capacity_ = size;
 61 |         num_available_ = size;
 62 |         datas_.resize(size);
 63 | 
 64 |         for(int i = 0; i < size; ++i)
 65 |             datas_[i] = std::shared_ptr<MonopolyData>(new MonopolyData(this));
 66 |     }
 67 | 
 68 |     virtual ~MonopolyAllocator(){
 69 |         run_ = false;
 70 |         cv_.notify_all();
 71 |         
 72 |         std::unique_lock<std::mutex> l(lock_);
 73 |         cv_exit_.wait(l, [&](){
 74 |             return num_wait_thread_ == 0;
 75 |         });
 76 |     }
 77 | 
 78 |     /* 获取一个可用的对象
 79 |         timeout：超时时间，如果没有可用的对象，将会进入阻塞等待，如果等待超时则返回空指针
 80 |         请求得到一个对象后，该对象被占用，除非他执行了release释放该对象所有权
 81 |     */
 82 |     MonopolyDataPointer query(int timeout = 10000){
 83 | 
 84 |         std::unique_lock<std::mutex> l(lock_);
 85 |         if(!run_) return nullptr;
 86 |         
 87 |         if(num_available_ == 0){
 88 |             num_wait_thread_++;
 89 | 
 90 |             auto state = cv_.wait_for(l, std::chrono::milliseconds(timeout), [&](){
 91 |                 return num_available_ > 0 || !run_;
 92 |             });
 93 | 
 94 |             num_wait_thread_--;
 95 |             cv_exit_.notify_one();
 96 | 
 97 |             // timeout, no available, exit program
 98 |             if(!state || num_available_ == 0 || !run_)
 99 |                 return nullptr;
100 |         }
101 | 
102 |         auto item = std::find_if(datas_.begin(), datas_.end(), [](MonopolyDataPointer& item){return item->available_;});
103 |         if(item == datas_.end())
104 |             return nullptr;
105 |         
106 |         (*item)->available_ = false;
107 |         num_available_--;
108 |         return *item;
109 |     }
110 | 
111 |     int num_available(){
112 |         return num_available_;
113 |     }
114 | 
115 |     int capacity(){
116 |         return capacity_;
117 |     }
118 | 
119 | private:
120 |     void release_one(MonopolyData* prq){
121 |         std::unique_lock<std::mutex> l(lock_);
122 |         if(!prq->available_){
123 |             prq->available_ = true;
124 |             num_available_++;
125 |             cv_.notify_one();
126 |         }
127 |     }
128 | 
129 | private:
130 |     std::mutex lock_;
131 |     std::condition_variable cv_;
132 |     std::condition_variable cv_exit_;
133 |     std::vector<MonopolyDataPointer> datas_;
134 |     int capacity_ = 0;
135 |     volatile int num_available_ = 0;
136 |     volatile int num_wait_thread_ = 0;
137 |     volatile bool run_ = true;
138 | };
139 | 
140 | #endif // MONOPOLY_ALLOCATOR_HPP


--------------------------------------------------------------------------------
/src/tensorRT/common/preprocess_kernel.cuh:
--------------------------------------------------------------------------------
  1 | #ifndef PREPROCESS_KERNEL_CUH
  2 | #define PREPROCESS_KERNEL_CUH
  3 | 
  4 | #include "cuda_tools.hpp"
  5 | 
  6 | namespace CUDAKernel{
  7 | 
  8 |     enum class NormType : int{
  9 |         None      = 0,
 10 |         MeanStd   = 1,
 11 |         AlphaBeta = 2
 12 |     };
 13 | 
 14 |     enum class ChannelType : int{
 15 |         None          = 0,
 16 |         Invert        = 1
 17 |     };
 18 | 
 19 |     struct Norm{
 20 |         float mean[3];
 21 |         float std[3];
 22 |         float alpha, beta;
 23 |         NormType type = NormType::None;
 24 |         ChannelType channel_type = ChannelType::None;
 25 | 
 26 |         // out = (x * alpha - mean) / std
 27 |         static Norm mean_std(const float mean[3], const float std[3], float alpha = 1/255.0f, ChannelType channel_type=ChannelType::None);
 28 | 
 29 |         // out = x * alpha + beta
 30 |         static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type=ChannelType::None);
 31 | 
 32 |         // None
 33 |         static Norm None();
 34 |     };
 35 | 
 36 |     void resize_bilinear_and_normalize(
 37 | 		uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height,
 38 | 		const Norm& norm,
 39 | 		cudaStream_t stream);
 40 | 
 41 |     void warp_affine_bilinear_and_normalize_plane(
 42 |         uint8_t* src, int src_line_size, int src_width, int src_height, 
 43 |         float* dst  , int dst_width, int dst_height,
 44 |         float* matrix_2_3, uint8_t const_value, const Norm& norm,
 45 |         cudaStream_t stream);
 46 | 
 47 |     void warp_affine_bilinear_and_normalize_focus(
 48 |         uint8_t* src, int src_line_size, int src_width, int src_height, 
 49 |         float* dst  , int dst_width, int dst_height,
 50 |         float* matrix_2_3, uint8_t const_value, const Norm& norm,
 51 |         cudaStream_t stream);
 52 | 
 53 |     // 可以用来图像校正、图像旋转等等 (测试比cpu快10倍以上)
 54 |     // 使用示范:
 55 |     // float* matrix_3_3 = nullptr;
 56 |     // size_t matrix_bytes = 3 * 3 * sizeof(f32);
 57 |     // checkCudaRuntime(cudaMalloc(&matrix_3_3, matrix_bytes));
 58 |     // checkCudaRuntime(cudaMemset(matrix_3_3, 0,  matrix_bytes));
 59 |     //
 60 |     // #左上、右上、右下、左下 原图像四个点的坐标
 61 |     //    cv::Point2f src_points[] = { 
 62 |     //    vctvctPoints[nImageIdx][0],
 63 |     //    vctvctPoints[nImageIdx][1],
 64 |     //    vctvctPoints[nImageIdx][2],
 65 |     //    vctvctPoints[nImageIdx][3]};
 66 |     // 
 67 |     // #左上、右上、左下、右下（Z 字形排列） 目标图像四个点的坐标
 68 |     //    cv::Point2f dst_points[] = {
 69 |     //        cv::Point2f(0, 0),
 70 |     //        cv::Point2f(nw-1, 0),
 71 |     //        cv::Point2f(0, nh-1),
 72 |     //        cv::Point2f(nw-1, nh-1) };
 73 |     // 利用opencv 得到变换矩阵  dst -> src 的 矩阵
 74 |     //    cv::Mat Perspect_Matrix = cv::getPerspectiveTransform(dst_points, src_points);
 75 |     //    Perspect_Matrix.convertTo(Perspect_Matrix,  CV_32FC1);
 76 |     // 拷贝到 gpu 
 77 |     //    checkCudaRuntime(cudaMemcpy(matrix_3_3, Perspect_Matrix.data, matrix_bytes, cudaMemcpyHostToDevice));
 78 |     void warp_perspective(
 79 |         uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height,
 80 | 		float* matrix_3_3, uint8_t const_value, const Norm& norm, cudaStream_t stream
 81 |     );
 82 | 
 83 |     void norm_feature(
 84 |         float* feature_array, int num_feature, int feature_length,
 85 |         cudaStream_t stream
 86 |     );
 87 | 
 88 |     // void convert_nv12_to_bgr_invoke(
 89 |     //     const uint8_t* y, const uint8_t* uv, int width, int height, 
 90 |     //     int linesize, uint8_t* dst, 
 91 |     //     cudaStream_t stream);
 92 |         
 93 |     // void convert_yuv422_to_bgr_invoke(
 94 |     //     const uint8_t* y, const uint8_t* uv, int width, int height, 
 95 |     //     int linesize, uint8_t* dst, 
 96 |     //     cudaStream_t stream);
 97 |         
 98 |     void gpuConvertYUYVtoRGB(
 99 |         const uint8_t* src,   uint8_t* dst,int width, int height, cudaStream_t stream);
100 | };
101 | 
102 | #endif // PREPROCESS_KERNEL_CUH


--------------------------------------------------------------------------------
/src/tensorRT/common/trt_tensor.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef TRT_TENSOR_HPP
  3 | #define TRT_TENSOR_HPP
  4 | 
  5 | #include <string>
  6 | #include <memory>
  7 | #include <vector>
  8 | #include <map>
  9 | #include <opencv2/opencv.hpp>
 10 | 
 11 | struct CUstream_st;
 12 | typedef CUstream_st CUStreamRaw;
 13 | 
 14 | #define CURRENT_DEVICE_ID           -1
 15 | 
 16 | namespace TRT {
 17 | 
 18 |     typedef struct{unsigned short _;} float16;
 19 |     typedef CUStreamRaw* CUStream;
 20 | 
 21 |     enum class DataHead : int{
 22 |         Init   = 0,
 23 |         Device = 1,
 24 |         Host   = 2
 25 |     };
 26 | 
 27 |     enum class DataType : int {
 28 |         Unknow = -1,
 29 |         Float = 0,
 30 |         Float16 = 1,
 31 |         Int32 = 2,
 32 |         UInt8 = 3
 33 |     };
 34 | 
 35 |     float float16_to_float(float16 value);
 36 |     float16 float_to_float16(float value);
 37 |     int data_type_size(DataType dt);
 38 |     const char* data_head_string(DataHead dh);
 39 |     const char* data_type_string(DataType dt);
 40 | 
 41 |     class MixMemory {
 42 |     public:
 43 |         MixMemory(int device_id = CURRENT_DEVICE_ID);
 44 |         MixMemory(void* cpu, size_t cpu_size, void* gpu, size_t gpu_size);
 45 |         virtual ~MixMemory();
 46 |         void* gpu(size_t size);
 47 |         void* cpu(size_t size);
 48 |         void release_gpu();
 49 |         void release_cpu();
 50 |         void release_all();
 51 | 
 52 |         inline bool owner_gpu() const{return owner_gpu_;}
 53 |         inline bool owner_cpu() const{return owner_cpu_;}
 54 | 
 55 |         inline size_t cpu_size() const{return cpu_size_;}
 56 |         inline size_t gpu_size() const{return gpu_size_;}
 57 |         inline int device_id() const{return device_id_;}
 58 | 
 59 |         inline void* gpu() const { return gpu_; }
 60 | 
 61 |         // Pinned Memory
 62 |         inline void* cpu() const { return cpu_; }
 63 | 
 64 |         void reference_data(void* cpu, size_t cpu_size, void* gpu, size_t gpu_size);
 65 | 
 66 |     private:
 67 |         void* cpu_ = nullptr;
 68 |         size_t cpu_size_ = 0;
 69 |         bool owner_cpu_ = true;
 70 |         int device_id_ = 0;
 71 | 
 72 |         void* gpu_ = nullptr;
 73 |         size_t gpu_size_ = 0;
 74 |         bool owner_gpu_ = true;
 75 |     };
 76 | 
 77 |     class Tensor {
 78 |     public:
 79 |         Tensor(const Tensor& other) = delete;
 80 |         Tensor& operator = (const Tensor& other) = delete;
 81 | 
 82 |         explicit Tensor(DataType dtype = DataType::Float, std::shared_ptr<MixMemory> data = nullptr, int device_id = CURRENT_DEVICE_ID);
 83 |         explicit Tensor(int n, int c, int h, int w, DataType dtype = DataType::Float, std::shared_ptr<MixMemory> data = nullptr, int device_id = CURRENT_DEVICE_ID);
 84 |         explicit Tensor(int ndims, const int* dims, DataType dtype = DataType::Float, std::shared_ptr<MixMemory> data = nullptr, int device_id = CURRENT_DEVICE_ID);
 85 |         explicit Tensor(const std::vector<int>& dims, DataType dtype = DataType::Float, std::shared_ptr<MixMemory> data = nullptr, int device_id = CURRENT_DEVICE_ID);
 86 |         virtual ~Tensor();
 87 | 
 88 |         int numel() const;
 89 |         inline int ndims() const{return shape_.size();}
 90 |         inline int size(int index)  const{return shape_[index];}
 91 |         inline int shape(int index) const{return shape_[index];}
 92 | 
 93 |         inline int batch()   const{return shape_[0];}
 94 |         inline int channel() const{return shape_[1];}
 95 |         inline int height()  const{return shape_[2];}
 96 |         inline int width()   const{return shape_[3];}
 97 | 
 98 |         inline DataType type()                const { return dtype_; }
 99 |         inline const std::vector<int>& dims() const { return shape_; }
100 |         inline const std::vector<size_t>& strides() const {return strides_;}
101 |         inline int bytes()                    const { return bytes_; }
102 |         inline int bytes(int start_axis)      const { return count(start_axis) * element_size(); }
103 |         inline int element_size()             const { return data_type_size(dtype_); }
104 |         inline DataHead head()                const { return head_; }
105 | 
106 |         std::shared_ptr<Tensor> clone() const;
107 |         Tensor& release();
108 |         Tensor& set_to(float value);
109 |         bool empty() const;
110 | 
111 |         template<typename ... _Args>
112 |         int offset(int index, _Args ... index_args) const{
113 |             const int index_array[] = {index, index_args...};
114 |             return offset_array(sizeof...(index_args) + 1, index_array);
115 |         }
116 | 
117 |         int offset_array(const std::vector<int>& index) const;
118 |         int offset_array(size_t size, const int* index_array) const;
119 | 
120 |         template<typename ... _Args>
121 |         Tensor& resize(int dim_size, _Args ... dim_size_args){
122 |             const int dim_size_array[] = {dim_size, dim_size_args...};
123 |             return resize(sizeof...(dim_size_args) + 1, dim_size_array);
124 |         }
125 | 
126 |         Tensor& resize(int ndims, const int* dims);
127 |         Tensor& resize(const std::vector<int>& dims);
128 |         Tensor& resize_single_dim(int idim, int size);
129 |         int  count(int start_axis = 0) const;
130 |         int device() const{return device_id_;}
131 | 
132 |         Tensor& to_gpu(bool copy=true);
133 |         Tensor& to_cpu(bool copy=true);
134 | 
135 |         Tensor& to_half();
136 |         Tensor& to_float();
137 |         inline void* cpu() const { ((Tensor*)this)->to_cpu(); return data_->cpu(); }
138 |         inline void* gpu() const { ((Tensor*)this)->to_gpu(); return data_->gpu(); }
139 |         
140 |         template<typename DType> inline const DType* cpu() const { return (DType*)cpu(); }
141 |         template<typename DType> inline DType* cpu()             { return (DType*)cpu(); }
142 | 
143 |         template<typename DType, typename ... _Args> 
144 |         inline DType* cpu(int i, _Args&& ... args) { return cpu<DType>() + offset(i, args...); }
145 | 
146 | 
147 |         template<typename DType> inline const DType* gpu() const { return (DType*)gpu(); }
148 |         template<typename DType> inline DType* gpu()             { return (DType*)gpu(); }
149 | 
150 |         template<typename DType, typename ... _Args> 
151 |         inline DType* gpu(int i, _Args&& ... args) { return gpu<DType>() + offset(i, args...); }
152 | 
153 | 
154 |         template<typename DType, typename ... _Args> 
155 |         inline DType& at(int i, _Args&& ... args) { return *(cpu<DType>() + offset(i, args...)); }
156 |         
157 |         std::shared_ptr<MixMemory> get_data()             const {return data_;}
158 |         std::shared_ptr<MixMemory> get_workspace()        const {return workspace_;}
159 |         Tensor& set_workspace(std::shared_ptr<MixMemory> workspace) {workspace_ = workspace; return *this;}
160 | 
161 |         bool is_stream_owner() const {return stream_owner_;}
162 |         CUStream get_stream() const{return stream_;}
163 |         Tensor& set_stream(CUStream stream, bool owner=false){stream_ = stream; stream_owner_ = owner; return *this;}
164 | 
165 |         Tensor& set_mat     (int n, const cv::Mat& image);
166 |         Tensor& set_norm_mat(int n, const cv::Mat& image, float mean[3], float std[3]);
167 |         cv::Mat at_mat(int n = 0, int c = 0) { return cv::Mat(height(), width(), CV_32F, cpu<float>(n, c)); }
168 | 
169 |         Tensor& synchronize();
170 |         const char* shape_string() const{return shape_string_;}
171 |         const char* descriptor() const;
172 | 
173 |         Tensor& copy_from_gpu(size_t offset, const void* src, size_t num_element, int device_id = CURRENT_DEVICE_ID);
174 |         Tensor& copy_from_cpu(size_t offset, const void* src, size_t num_element);
175 | 
176 |         void reference_data(const std::vector<int>& shape, void* cpu_data, size_t cpu_size, void* gpu_data, size_t gpu_size, DataType dtype);
177 | 
178 |         /**
179 |         
180 |         # 以下代码是python中加载Tensor
181 |         import numpy as np
182 | 
183 |         def load_tensor(file):
184 |             
185 |             with open(file, "rb") as f:
186 |                 binary_data = f.read()
187 | 
188 |             magic_number, ndims, dtype = np.frombuffer(binary_data, np.uint32, count=3, offset=0)
189 |             assert magic_number == 0xFCCFE2E2, f"{file} not a tensor file."
190 |             
191 |             dims = np.frombuffer(binary_data, np.uint32, count=ndims, offset=3 * 4)
192 | 
193 |             if dtype == 0:
194 |                 np_dtype = np.float32
195 |             elif dtype == 1:
196 |                 np_dtype = np.float16
197 |             else:
198 |                 assert False, f"Unsupport dtype = {dtype}, can not convert to numpy dtype"
199 |                 
200 |             return np.frombuffer(binary_data, np_dtype, offset=(ndims + 3) * 4).reshape(*dims)
201 | 
202 |          **/
203 |         bool save_to_file(const std::string& file) const;
204 |         bool load_from_file(const std::string& file);
205 | 
206 |     private:
207 |         Tensor& compute_shape_string();
208 |         Tensor& adajust_memory_by_update_dims_or_type();
209 |         void setup_data(std::shared_ptr<MixMemory> data);
210 | 
211 |     private:
212 |         std::vector<int> shape_;
213 |         std::vector<size_t> strides_;
214 |         size_t bytes_    = 0;
215 |         DataHead head_   = DataHead::Init;
216 |         DataType dtype_  = DataType::Float;
217 |         CUStream stream_ = nullptr;
218 |         bool stream_owner_ = false;
219 |         int device_id_   = 0;
220 |         char shape_string_[100];
221 |         char descriptor_string_[100];
222 |         std::shared_ptr<MixMemory> data_;
223 |         std::shared_ptr<MixMemory> workspace_;
224 |     };
225 | };
226 | 
227 | #endif // TRT_TENSOR_HPP


--------------------------------------------------------------------------------
/src/tensorRT/import_lib.cpp:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | #if defined(_WIN32)
 3 | #	define U_OS_WINDOWS
 4 | #else
 5 | #   define U_OS_LINUX
 6 | #endif
 7 | 
 8 | #ifdef U_OS_WINDOWS
 9 | #if defined(_DEBUG)
10 | #	pragma comment(lib, "opencv_world346d.lib")
11 | #else
12 | #	pragma comment(lib, "opencv_world346.lib")
13 | #endif
14 | 
15 | //导入cuda
16 | #pragma comment(lib, "cuda.lib")
17 | #pragma comment(lib, "cudart.lib")
18 | #pragma comment(lib, "cublas.lib")
19 | #pragma comment(lib, "cudnn.lib")
20 | 
21 | //导入tensorRT
22 | #pragma comment(lib, "nvinfer.lib")
23 | #pragma comment(lib, "nvinfer_plugin.lib")
24 | //#pragma comment(lib, "nvparsers.lib")
25 | 
26 | #if defined(_DEBUG)
27 | #pragma comment(lib, "libprotobufd.lib")
28 | #else
29 | #pragma comment(lib, "libprotobuf.lib")
30 | #endif
31 | 
32 | #ifdef HAS_PYTHON
33 | #pragma comment(lib, "python37.lib")
34 | #endif
35 | 
36 | #endif // U_OS_WINDOWS


--------------------------------------------------------------------------------
/src/tensorRT/infer/trt_infer.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef TRT_INFER_HPP
 4 | #define TRT_INFER_HPP
 5 | 
 6 | #include <string>
 7 | #include <memory>
 8 | #include <vector>
 9 | #include <map>
10 | #include <common/trt_tensor.hpp>
11 | 
12 | namespace TRT {
13 | 
14 | 	class Infer {
15 | 	public:
16 | 		virtual void     forward(bool sync = true) = 0;
17 | 		virtual int      get_max_batch_size() = 0;
18 | 		virtual void     set_stream(CUStream stream) = 0;
19 | 		virtual CUStream get_stream() = 0;
20 | 		virtual void     synchronize() = 0;
21 | 		virtual size_t   get_device_memory_size() = 0;
22 | 		virtual std::shared_ptr<MixMemory> get_workspace() = 0;
23 | 		virtual std::shared_ptr<Tensor>    input (int index = 0) = 0;
24 | 		virtual std::shared_ptr<Tensor>    output(int index = 0) = 0;
25 | 		virtual std::shared_ptr<Tensor>    tensor(const std::string& name) = 0;
26 | 		virtual std::string get_input_name (int index = 0) = 0;
27 | 		virtual std::string get_output_name(int index = 0) = 0;
28 | 		virtual bool is_output_name(const std::string& name) = 0;
29 | 		virtual bool is_input_name (const std::string& name) = 0;
30 | 		virtual int  num_output() = 0;
31 | 		virtual int  num_input() = 0;
32 | 		virtual void print() = 0;
33 | 		virtual int  device() = 0;
34 | 		virtual void set_input (int index, std::shared_ptr<Tensor> tensor) = 0;
35 | 		virtual void set_output(int index, std::shared_ptr<Tensor> tensor) = 0;
36 | 		virtual std::shared_ptr<std::vector<uint8_t>> serial_engine() = 0;
37 | 	};
38 | 
39 | 	struct DeviceMemorySummary {
40 | 		size_t total;
41 | 		size_t available;
42 | 	};
43 | 
44 | 	DeviceMemorySummary get_current_device_summary();
45 | 	int get_device_count();
46 | 	int get_device();
47 | 	
48 | 	void set_device(int device_id);
49 | 	std::shared_ptr<Infer> load_infer_from_memory(const void* pdata, size_t size);
50 | 	std::shared_ptr<Infer> load_infer(const std::string& file);
51 | 	bool init_nv_plugins();
52 | 
53 | };	//TRTInfer
54 | 
55 | 
56 | #endif //TRT_INFER_HPP


--------------------------------------------------------------------------------
/src/tensorRT/onnx/onnx_pb.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) ONNX Project Contributors.
 2 | // Licensed under the MIT license.
 3 | 
 4 | #ifndef ONNX_ONNX_PB_H
 5 | #define ONNX_ONNX_PB_H
 6 | 
 7 | // Defines ONNX_EXPORT and ONNX_IMPORT. On Windows, this corresponds to
 8 | // different declarations (dllexport and dllimport). On Linux/Mac, it just
 9 | // resolves to the same "default visibility" setting.
10 | #if defined(_MSC_VER)
11 | #if defined(ONNX_BUILD_SHARED_LIBS) || defined(ONNX_BUILD_MAIN_LIB)
12 | #define ONNX_EXPORT __declspec(dllexport)
13 | #define ONNX_IMPORT __declspec(dllimport)
14 | #else
15 | #define ONNX_EXPORT
16 | #define ONNX_IMPORT
17 | #endif
18 | #else
19 | #if defined(__GNUC__)
20 | #define ONNX_EXPORT __attribute__((__visibility__("default")))
21 | #else
22 | #define ONNX_EXPORT
23 | #endif
24 | #define ONNX_IMPORT ONNX_EXPORT
25 | #endif
26 | 
27 | // ONNX_API is a macro that, depends on whether you are building the
28 | // main ONNX library or not, resolves to either ONNX_EXPORT or
29 | // ONNX_IMPORT.
30 | //
31 | // This is used in e.g. ONNX's protobuf files: when building the main library,
32 | // it is defined as ONNX_EXPORT to fix a Windows global-variable-in-dll
33 | // issue, and for anyone dependent on ONNX it will be defined as
34 | // ONNX_IMPORT. ONNX_BUILD_MAIN_LIB can also be set when being built
35 | // statically if ONNX is being linked into a shared library that wants
36 | // to export the ONNX APIs and classes.
37 | //
38 | // More details on Windows dllimport / dllexport can be found at
39 | // https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx
40 | //
41 | // This solution is similar to
42 | // https://github.com/pytorch/pytorch/blob/master/caffe2/core/common.h
43 | #define ONNX_API
44 | #include "onnx-ml.pb.h"
45 | 
46 | #endif // ! ONNX_ONNX_PB_H
47 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx/readme.md:
--------------------------------------------------------------------------------
1 | # ONNX
2 | - 这几个文件来自于对ONNX的编译后提取的结果，由protoc生成的cpp
3 | - https://github.com/onnx/onnx


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/ImporterContext.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include "onnx2trt.hpp"
  8 | #include "onnx2trt_utils.hpp"
  9 | #include "onnxErrorRecorder.hpp"
 10 | // #include "onnx/common/stl_backports.h"
 11 | #include <list>
 12 | #include <unordered_map>
 13 | 
 14 | namespace onnx2trt
 15 | {
 16 | 
 17 | class ErrorRecorderWrapper
 18 | {
 19 | public:
 20 |     ErrorRecorderWrapper(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger)
 21 |         : mNetwork(network)
 22 |         , mLogger(logger)
 23 |     {
 24 |         if (mNetwork)
 25 |         {
 26 |             mUserErrorRecorder = mNetwork->getErrorRecorder();
 27 |             mOnnxErrorRecorder = ONNXParserErrorRecorder::create(logger, mUserErrorRecorder);
 28 |             if (mOnnxErrorRecorder)
 29 |             {
 30 |                 if (mUserErrorRecorder)
 31 |                 {
 32 |                     mUserErrorRecorder->incRefCount();
 33 |                 }
 34 |                 mNetwork->setErrorRecorder(mOnnxErrorRecorder);
 35 |             }
 36 |         }
 37 |     }
 38 | 
 39 |     ~ErrorRecorderWrapper()
 40 |     {
 41 |         if (mNetwork && mOnnxErrorRecorder)
 42 |         {
 43 |             mNetwork->setErrorRecorder(mUserErrorRecorder);
 44 |             if (mUserErrorRecorder)
 45 |             {
 46 |                 mUserErrorRecorder->decRefCount();
 47 |             }
 48 |             ONNXParserErrorRecorder::destroy(mOnnxErrorRecorder);
 49 |         }
 50 |     }
 51 | 
 52 |     bool hasError() const
 53 |     {
 54 |         return mOnnxErrorRecorder != nullptr && mOnnxErrorRecorder->getNbErrors() != 0;
 55 |     }
 56 | 
 57 |     //! Return recorder used by hasError().
 58 |     nvinfer1::IErrorRecorder* getErrorRecorder() const
 59 |     {
 60 |         return mOnnxErrorRecorder ? mOnnxErrorRecorder : nullptr;
 61 |     }
 62 | private:
 63 |     nvinfer1::INetworkDefinition* mNetwork{nullptr};
 64 |     nvinfer1::ILogger* mLogger{nullptr};
 65 |     ONNXParserErrorRecorder* mOnnxErrorRecorder{nullptr};
 66 |     nvinfer1::IErrorRecorder* mUserErrorRecorder{nullptr};
 67 | };
 68 | 
 69 | class ImporterContext final : public IImporterContext
 70 | {
 71 |     nvinfer1::INetworkDefinition* mNetwork;
 72 |     nvinfer1::ILogger* mLogger;
 73 |     std::list<std::vector<uint8_t>> mTempBufs;
 74 |     StringMap<nvinfer1::ITensor*> mUserInputs;
 75 |     StringMap<nvinfer1::ITensor**> mUserOutputs;
 76 |     StringMap<int64_t> mOpsets;
 77 |     StringMap<TensorOrWeights> mTensors; // All tensors in the graph mapped to their names.
 78 |     StringMap<nvinfer1::TensorLocation> mTensorLocations;
 79 |     StringMap<float> mTensorRangeMins;
 80 |     StringMap<float> mTensorRangeMaxes;
 81 |     StringMap<nvinfer1::DataType> mLayerPrecisions;
 82 |     std::set<std::string> mTensorNames; // Keep track of how many times a tensor name shows up, to avoid duplicate naming in TRT.
 83 |     std::set<std::string> mLayerNames; // Keep track of how many times a tensor name shows up, to avoid duplicate naming in TRT.
 84 |     int64_t mSuffixCounter{0}; // increasing suffix counter used to uniquify layer names.
 85 |     std::unordered_set<std::string> mUnsupportedShapeTensors; // Container to hold output tensor names of layers that produce shape tensor outputs but do not natively support them.
 86 |     StringMap<std::string> mLoopTensors; // Container to map subgraph tensors to their original outer graph names.
 87 |     std::string mOnnxFileLocation; // Keep track of the directory of the parsed ONNX file
 88 |     std::unique_ptr<ErrorRecorderWrapper> mErrorWrapper; // error recorder to control TRT errors
 89 | 
 90 | public:
 91 |     ImporterContext(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger)
 92 |         : mNetwork(network)
 93 |         , mLogger(logger)
 94 |         // Disable ErrorRecorder for now due to incompatibilities with ONNXRT.
 95 |         // , mErrorWrapper(onnx::make_unique<ErrorRecorderWrapper>(mNetwork, logger))
 96 |         , mErrorWrapper(nullptr)
 97 |     {
 98 |     }
 99 |     nvinfer1::INetworkDefinition* network() override
100 |     {
101 |         return mNetwork;
102 |     }
103 |     StringMap<TensorOrWeights>& tensors() override
104 |     {
105 |         return mTensors;
106 |     }
107 |     StringMap<nvinfer1::TensorLocation>& tensorLocations() override
108 |     {
109 |         return mTensorLocations;
110 |     }
111 |     StringMap<float>& tensorRangeMins() override
112 |     {
113 |         return mTensorRangeMins;
114 |     }
115 |     StringMap<float>& tensorRangeMaxes() override
116 |     {
117 |         return mTensorRangeMaxes;
118 |     }
119 |     StringMap<nvinfer1::DataType>& layerPrecisions() override
120 |     {
121 |         return mLayerPrecisions;
122 |     }
123 |     std::unordered_set<std::string>& unsupportedShapeTensors() override
124 |     {
125 |         return mUnsupportedShapeTensors;
126 |     }
127 |     StringMap<std::string>& loopTensors() override
128 |     {
129 |         return mLoopTensors;
130 |     }
131 |     void setOnnxFileLocation(std::string location) override
132 |     {
133 |         mOnnxFileLocation = location;
134 |     }
135 |     std::string getOnnxFileLocation() override
136 |     {
137 |         return mOnnxFileLocation;
138 |     }
139 |     // This actually handles weights as well, but is named this way to be consistent with the tensors()
140 |     void registerTensor(TensorOrWeights tensor, const std::string& basename) override
141 |     {
142 |         // TRT requires unique tensor names.
143 |         const std::string uniqueName = generateUniqueName(mTensorNames, basename);
144 | 
145 |         if (tensor)
146 |         {
147 |             auto* ctx = this; // To enable logging.
148 |             if (tensor.is_tensor())
149 |             {
150 |                 tensor.tensor().setName(uniqueName.c_str());
151 | 
152 |                 LOG_VERBOSE("Registering tensor: " << uniqueName << " for ONNX tensor: " << basename);
153 |             }
154 |             else if (tensor.is_weights())
155 |             {
156 |                 const auto& weights = tensor.weights();
157 |                 if (tensor.weights().type == ::onnx::TensorProto::INT64)
158 |                 {
159 |                     tensor = ShapedWeights{::onnx::TensorProto::INT32,
160 |                         convertINT64(reinterpret_cast<int64_t*>(weights.values), weights.shape, ctx), weights.shape};
161 |                 }
162 |                 tensor.weights().setName(basename.c_str());
163 |             }
164 | 
165 |         }
166 |         // Overwrite previous tensors registered with the same name (this only happens when there are subgraphs,
167 |         // and in that case, overwriting is the desired behavior).
168 |         this->tensors()[basename] = std::move(tensor);
169 |     }
170 | 
171 |     void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) override
172 |     {
173 |         // No layer will be added for Constant nodes in ONNX.
174 |         if (layer)
175 |         {
176 |             const std::string name = basename.empty() ? layer->getName() : basename;
177 |             const std::string uniqueName = generateUniqueName(mLayerNames, name);
178 | 
179 |             auto* ctx = this; // To enable logging.
180 |             LOG_VERBOSE("Registering layer: " << uniqueName << " for ONNX node: " << basename);
181 | 
182 |             layer->setName(uniqueName.c_str());
183 |         }
184 |     }
185 | 
186 |     nvinfer1::ILogger& logger() override
187 |     {
188 |         return *mLogger;
189 |     }
190 | 
191 |     ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) override
192 |     {
193 |         ShapedWeights weights(type, nullptr, shape);
194 |         // Need special logic for handling scalars.
195 |         if (shape.nbDims == 0)
196 |         {
197 |             mTempBufs.push_back(std::vector<uint8_t>(getDtypeSize(type), value));
198 |         }
199 |         else
200 |         {
201 |             mTempBufs.push_back(std::vector<uint8_t>(weights.size_bytes(), value));
202 |         }
203 |         weights.values = mTempBufs.back().data();
204 |         return weights;
205 |     }
206 | 
207 |     bool setUserInput(const char* name, nvinfer1::ITensor* input)
208 |     {
209 |         mUserInputs[name] = input;
210 |         return true;
211 |     }
212 |     bool setUserOutput(const char* name, nvinfer1::ITensor** output)
213 |     {
214 |         mUserOutputs[name] = output;
215 |         return true;
216 |     }
217 |     nvinfer1::ITensor* getUserInput(const char* name)
218 |     {
219 |         if (!mUserInputs.count(name))
220 |         {
221 |             return nullptr;
222 |         }
223 |         else
224 |         {
225 |             return mUserInputs.at(name);
226 |         }
227 |     }
228 |     nvinfer1::ITensor** getUserOutput(const char* name)
229 |     {
230 |         if (!mUserOutputs.count(name))
231 |         {
232 |             return nullptr;
233 |         }
234 |         else
235 |         {
236 |             return mUserOutputs.at(name);
237 |         }
238 |     }
239 |     StringMap<nvinfer1::ITensor**> const& getUserOutputs() const
240 |     {
241 |         return mUserOutputs;
242 |     }
243 |     void clearOpsets()
244 |     {
245 |         mOpsets.clear();
246 |     }
247 |     void addOpset(std::string domain, int64_t version)
248 |     {
249 |         mOpsets.emplace(domain, version);
250 |     }
251 |     int64_t getOpsetVersion(const char* domain = "") const override
252 |     {
253 |         if (mOpsets.empty())
254 |         {
255 |             return 1;
256 |         }
257 |         else if (mOpsets.size() == 1)
258 |         {
259 |             return mOpsets.begin()->second;
260 |         }
261 |         else
262 |         {
263 |             assert(mOpsets.count(domain));
264 |             return mOpsets.at(domain);
265 |         }
266 |     }
267 |     bool hasError() const noexcept override
268 |     {
269 |         return mErrorWrapper != nullptr && mErrorWrapper->hasError();
270 |     }
271 | 
272 |     nvinfer1::IErrorRecorder* getErrorRecorder() const noexcept override
273 |     {
274 |         return mErrorWrapper ? mErrorWrapper->getErrorRecorder() : nullptr;
275 |     }
276 | private:
277 |     std::string generateUniqueName(std::set<std::string>& namesSet, const std::string& basename)
278 |     {
279 |         std::string candidate = basename;
280 | 
281 |         while (namesSet.find(candidate) != namesSet.end())
282 |         {
283 |             candidate = basename + "_" + std::to_string(mSuffixCounter);
284 |             ++mSuffixCounter;
285 |         }
286 | 
287 |         namesSet.insert(candidate);
288 | 
289 |         return candidate;
290 |     }
291 | };
292 | 
293 | } // namespace onnx2trt
294 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/LoopHelpers.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #include "LoopHelpers.hpp"
 6 | #include "onnx2trt_utils.hpp"
 7 | 
 8 | namespace onnx2trt
 9 | {
10 | 
11 | nvinfer1::ITensor* addLoopCounter(IImporterContext* ctx, nvinfer1::ILoop* loop, int32_t initial)
12 | {
13 |     nvinfer1::ITensor* initialTensor = addConstantScalar(ctx, initial, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 1})->getOutput(0);
14 |     nvinfer1::ITensor* one = addConstantScalar(ctx, 1, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 1})->getOutput(0);
15 | 
16 |     auto counter = loop->addRecurrence(*initialTensor);
17 |     nvinfer1::ITensor* addOne = ctx->network()->addElementWise(*counter->getOutput(0), *one, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
18 |     counter->setInput(1, *addOne);
19 |     return counter->getOutput(0);
20 | }
21 | 
22 | } // namespace onnx2trt
23 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/LoopHelpers.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <NvInfer.h>
 8 | 
 9 | #include "ImporterContext.hpp"
10 | 
11 | namespace onnx2trt
12 | {
13 | 
14 | nvinfer1::ITensor* addLoopCounter(IImporterContext* ctx, nvinfer1::ILoop* loop, int32_t initial = 0);
15 | 
16 | } // namespace onnx2trt
17 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/ModelImporter.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "ImporterContext.hpp"
 8 | #include "NvInferPlugin.h"
 9 | #include "NvOnnxParser.h"
10 | #include "builtin_op_importers.hpp"
11 | #include "utils.hpp"
12 | 
13 | namespace onnx2trt
14 | {
15 | 
16 | Status parseGraph(IImporterContext* ctx, const ::onnx::GraphProto& graph, bool deserializingINetwork = false, int* currentNode = nullptr);
17 | 
18 | class ModelImporter : public nvonnxparser::IParser
19 | {
20 | protected:
21 |     string_map<NodeImporter> _op_importers;
22 |     virtual Status importModel(::onnx::ModelProto const& model);
23 | 
24 | private:
25 |     ImporterContext _importer_ctx;
26 |     std::list<::onnx::ModelProto> _onnx_models; // Needed for ownership of weights
27 |     int _current_node;
28 |     std::vector<Status> _errors;
29 |     std::vector<nvinfer1::Dims> _input_dims;
30 | 
31 | public:
32 |     ModelImporter(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger, const std::vector<nvinfer1::Dims>& input_dims)
33 |         : _op_importers(getBuiltinOpImporterMap())
34 |         , _importer_ctx(network, logger)
35 |         , _input_dims(input_dims)
36 |     {
37 |     }
38 |     bool parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) override;
39 |     bool parse(void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path = nullptr) override;
40 |     bool supportsModel(void const* serialized_onnx_model, size_t serialized_onnx_model_size,
41 |         SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) override;
42 | 
43 |     bool supportsOperator(const char* op_name) const override;
44 |     void destroy() override
45 |     {
46 |         delete this;
47 |     }
48 |     // virtual void registerOpImporter(std::string op,
49 |     //                                NodeImporter const &node_importer) override {
50 |     //  // Note: This allows existing importers to be replaced
51 |     //  _op_importers[op] = node_importer;
52 |     //}
53 |     // virtual Status const &setInput(const char *name,
54 |     //                               nvinfer1::ITensor *input) override;
55 |     // virtual Status const& setOutput(const char* name, nvinfer1::ITensor** output) override;
56 |     int getNbErrors() const override
57 |     {
58 |         return _errors.size();
59 |     }
60 |     nvonnxparser::IParserError const* getError(int index) const override
61 |     {
62 |         assert(0 <= index && index < (int) _errors.size());
63 |         return &_errors[index];
64 |     }
65 |     void clearErrors() override
66 |     {
67 |         _errors.clear();
68 |     }
69 | 
70 |     //...LG: Move the implementation to .cpp
71 |     bool parseFromFile(const char* onnxModelFile, int verbosity) override;
72 |     bool parseFromData(const void* onnx_data, size_t size, int verbosity) override;
73 | };
74 | 
75 | } // namespace onnx2trt
76 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/NvOnnxParser.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #include "NvOnnxParser.h"
 6 | #include "ModelImporter.hpp"
 7 | 
 8 | extern "C" void* createNvOnnxParser_INTERNAL(void* network_, void* logger_, int version, const std::vector<nvinfer1::Dims>& input_dims)
 9 | {
10 |     auto network = static_cast<nvinfer1::INetworkDefinition*>(network_);
11 |     auto logger = static_cast<nvinfer1::ILogger*>(logger_);
12 |     return new onnx2trt::ModelImporter(network, logger, input_dims);
13 | }
14 | 
15 | extern "C" int getNvOnnxParserVersion()
16 | {
17 |     return NV_ONNX_PARSER_VERSION;
18 | }


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/NvOnnxParser.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #ifndef NV_ONNX_PARSER_H
  6 | #define NV_ONNX_PARSER_H
  7 | 
  8 | #include "NvInfer.h"
  9 | #include <stddef.h>
 10 | #include <vector>
 11 | #include <functional>
 12 | #include <string>
 13 | 
 14 | //!
 15 | //! \file NvOnnxParser.h
 16 | //!
 17 | //! This is the API for the ONNX Parser
 18 | //!
 19 | 
 20 | #define NV_ONNX_PARSER_MAJOR 0
 21 | #define NV_ONNX_PARSER_MINOR 1
 22 | #define NV_ONNX_PARSER_PATCH 0
 23 | 
 24 | static const int NV_ONNX_PARSER_VERSION = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH);
 25 | 
 26 | //! \typedef SubGraph_t
 27 | //!
 28 | //! \brief The data structure containing the parsing capability of
 29 | //! a set of nodes in an ONNX graph.
 30 | //!
 31 | typedef std::pair<std::vector<size_t>, bool> SubGraph_t;
 32 | 
 33 | //! \typedef SubGraphCollection_t
 34 | //!
 35 | //! \brief The data structure containing all SubGraph_t partitioned
 36 | //! out of an ONNX graph.
 37 | //!
 38 | typedef std::vector<SubGraph_t> SubGraphCollection_t;
 39 | 
 40 | //!
 41 | //! \namespace nvonnxparser
 42 | //!
 43 | //! \brief The TensorRT ONNX parser API namespace
 44 | //!
 45 | namespace nvonnxparser
 46 | {
 47 | 
 48 | template <typename T>
 49 | inline int32_t EnumMax();
 50 | 
 51 | /** \enum ErrorCode
 52 |  *
 53 |  * \brief the type of parser error
 54 |  */
 55 | enum class ErrorCode : int
 56 | {
 57 |     kSUCCESS = 0,
 58 |     kINTERNAL_ERROR = 1,
 59 |     kMEM_ALLOC_FAILED = 2,
 60 |     kMODEL_DESERIALIZE_FAILED = 3,
 61 |     kINVALID_VALUE = 4,
 62 |     kINVALID_GRAPH = 5,
 63 |     kINVALID_NODE = 6,
 64 |     kUNSUPPORTED_GRAPH = 7,
 65 |     kUNSUPPORTED_NODE = 8
 66 | };
 67 | 
 68 | template <>
 69 | inline int32_t EnumMax<ErrorCode>()
 70 | {
 71 |     return 9;
 72 | }
 73 | 
 74 | /** \class IParserError
 75 |  *
 76 |  * \brief an object containing information about an error
 77 |  */
 78 | class IParserError
 79 | {
 80 | public:
 81 |     /** \brief the error code
 82 |      */
 83 |     virtual ErrorCode code() const = 0;
 84 |     /** \brief description of the error
 85 |      */
 86 |     virtual const char* desc() const = 0;
 87 |     /** \brief source file in which the error occurred
 88 |      */
 89 |     virtual const char* file() const = 0;
 90 |     /** \brief source line at which the error occurred
 91 |      */
 92 |     virtual int line() const = 0;
 93 |     /** \brief source function in which the error occurred
 94 |      */
 95 |     virtual const char* func() const = 0;
 96 |     /** \brief index of the ONNX model node in which the error occurred
 97 |      */
 98 |     virtual int node() const = 0;
 99 | 
100 | protected:
101 |     virtual ~IParserError() {}
102 | };
103 | 
104 | /** \class IParser
105 |  *
106 |  * \brief an object for parsing ONNX models into a TensorRT network definition
107 |  */
108 | class IParser
109 | {
110 | public:
111 |     /** \brief Parse a serialized ONNX model into the TensorRT network.
112 |      *         This method has very limited diagnostics. If parsing the serialized model
113 |      *         fails for any reason (e.g. unsupported IR version, unsupported opset, etc.)
114 |      *         it the user responsibility to intercept and report the error.
115 |      *         To obtain a better diagnostic, use the parseFromFile method below.
116 |      *
117 |      * \param serialized_onnx_model Pointer to the serialized ONNX model
118 |      * \param serialized_onnx_model_size Size of the serialized ONNX model
119 |      *        in bytes
120 |      * \param model_path Absolute path to the model file for loading external weights if required
121 |      * \return true if the model was parsed successfully
122 |      * \see getNbErrors() getError()
123 |      */
124 |     virtual bool parse(void const* serialized_onnx_model,
125 |                        size_t serialized_onnx_model_size,
126 |                        const char* model_path = nullptr)
127 |         = 0;
128 | 
129 |     /** \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model
130 |      *         calls parse method inside.
131 |      *
132 |      * \param File name
133 |      * \param Verbosity Level
134 |      *
135 |      * \return true if the model was parsed successfully
136 |      *
137 |      */
138 |     virtual bool parseFromFile(const char* onnxModelFile, int verbosity) = 0;
139 |     virtual bool parseFromData(const void* onnx_data, size_t size, int verbosity) = 0;
140 | 
141 |     /** \brief Check whether TensorRT supports a particular ONNX model
142 |      *
143 |      * \param serialized_onnx_model Pointer to the serialized ONNX model
144 |      * \param serialized_onnx_model_size Size of the serialized ONNX model
145 |      *        in bytes
146 |      * \param sub_graph_collection Container to hold supported subgraphs
147 |      * \param model_path Absolute path to the model file for loading external weights if required
148 |      * \return true if the model is supported
149 |      */
150 |     virtual bool supportsModel(void const* serialized_onnx_model,
151 |                                size_t serialized_onnx_model_size,
152 |                                SubGraphCollection_t& sub_graph_collection,
153 |                                const char* model_path = nullptr)
154 |         = 0;
155 | 
156 |     /** \brief Parse a serialized ONNX model into the TensorRT network
157 |      * with consideration of user provided weights
158 |      *
159 |      * \param serialized_onnx_model Pointer to the serialized ONNX model
160 |      * \param serialized_onnx_model_size Size of the serialized ONNX model
161 |      *        in bytes
162 |      * \return true if the model was parsed successfully
163 |      * \see getNbErrors() getError()
164 |      */
165 |     virtual bool parseWithWeightDescriptors(
166 |         void const* serialized_onnx_model, size_t serialized_onnx_model_size)
167 |         = 0;
168 | 
169 |     /** \brief Returns whether the specified operator may be supported by the
170 |      *         parser.
171 |      *
172 |      * Note that a result of true does not guarantee that the operator will be
173 |      * supported in all cases (i.e., this function may return false-positives).
174 |      *
175 |      * \param op_name The name of the ONNX operator to check for support
176 |      */
177 |     virtual bool supportsOperator(const char* op_name) const = 0;
178 |     /** \brief destroy this object
179 |      *
180 |      * \warning deprecated and planned on being removed in TensorRT 10.0
181 |      */
182 |     TRT_DEPRECATED virtual void destroy() = 0;
183 |     /** \brief Get the number of errors that occurred during prior calls to
184 |      *         \p parse
185 |      *
186 |      * \see getError() clearErrors() IParserError
187 |      */
188 |     virtual int getNbErrors() const = 0;
189 |     /** \brief Get an error that occurred during prior calls to \p parse
190 |      *
191 |      * \see getNbErrors() clearErrors() IParserError
192 |      */
193 |     virtual IParserError const* getError(int index) const = 0;
194 |     /** \brief Clear errors from prior calls to \p parse
195 |      *
196 |      * \see getNbErrors() getError() IParserError
197 |      */
198 |     virtual void clearErrors() = 0;
199 | 
200 |     virtual ~IParser() noexcept = default;
201 | };
202 | 
203 | } // namespace nvonnxparser
204 | 
205 | extern "C" TENSORRTAPI void* createNvOnnxParser_INTERNAL(void* network, void* logger, int version, const std::vector<nvinfer1::Dims>& input_dims);
206 | extern "C" TENSORRTAPI int getNvOnnxParserVersion();
207 | extern "C" TENSORRTAPI void register_layerhook_reshape(const std::function<std::vector<int64_t>(const std::string& name, const std::vector<int64_t>& shape)>&);
208 | 
209 | namespace nvonnxparser
210 | {
211 | 
212 | namespace
213 | {
214 | 
215 | /** \brief Create a new parser object
216 |  *
217 |  * \param network The network definition that the parser will write to
218 |  * \param logger The logger to use
219 |  * \return a new parser object or NULL if an error occurred
220 |  *
221 |  * Any input dimensions that are constant should not be changed after parsing,
222 |  * because correctness of the translation may rely on those constants.
223 |  * Changing a dynamic input dimension, i.e. one that translates to -1 in
224 |  * TensorRT, to a constant is okay if the constant is consistent with the model.
225 |  *
226 |  * \see IParser
227 |  */
228 | inline IParser* createParser(nvinfer1::INetworkDefinition& network, nvinfer1::ILogger& logger, const std::vector<nvinfer1::Dims>& input_dims={})
229 | {
230 |     return static_cast<IParser*>(createNvOnnxParser_INTERNAL(&network, &logger, NV_ONNX_PARSER_VERSION, input_dims));
231 | }
232 | 
233 | } // namespace
234 | 
235 | } // namespace nvonnxparser
236 | 
237 | #endif // NV_ONNX_PARSER_H
238 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/OnnxAttrs.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #include "OnnxAttrs.hpp"
  6 | #include "ShapedWeights.hpp"
  7 | #include "onnx2trt_utils.hpp"
  8 | #include <onnx/onnx_pb.h>
  9 | 
 10 | template <>
 11 | float OnnxAttrs::get<float>(const std::string& key) const
 12 | {
 13 |     return this->at(key)->f();
 14 | }
 15 | 
 16 | template <>
 17 | int OnnxAttrs::get<int>(const std::string& key) const
 18 | {
 19 |     return this->at(key)->i();
 20 | }
 21 | 
 22 | template <>
 23 | bool OnnxAttrs::get<bool>(const std::string& key) const
 24 | {
 25 |     int value = this->at(key)->i();
 26 |     assert(value == bool(value));
 27 |     return bool(value);
 28 | }
 29 | 
 30 | template <>
 31 | std::string OnnxAttrs::get<std::string>(const std::string& key) const
 32 | {
 33 |     return this->at(key)->s();
 34 | }
 35 | 
 36 | template <>
 37 | std::vector<int> OnnxAttrs::get<std::vector<int>>(const std::string& key) const
 38 | {
 39 |     auto attr = this->at(key)->ints();
 40 |     return std::vector<int>(attr.begin(), attr.end());
 41 | }
 42 | 
 43 | template <>
 44 | std::vector<int64_t> OnnxAttrs::get<std::vector<int64_t>>(const std::string& key) const
 45 | {
 46 |     auto attr = this->at(key)->ints();
 47 |     return std::vector<int64_t>(attr.begin(), attr.end());
 48 | }
 49 | 
 50 | template <>
 51 | std::vector<float> OnnxAttrs::get<std::vector<float>>(const std::string& key) const
 52 | {
 53 |     auto attr = this->at(key)->floats();
 54 |     return std::vector<float>(attr.begin(), attr.end());
 55 | }
 56 | 
 57 | template <>
 58 | nvinfer1::Dims OnnxAttrs::get<nvinfer1::Dims>(const std::string& key) const
 59 | {
 60 |     auto values = this->get<std::vector<int>>(key);
 61 |     nvinfer1::Dims dims;
 62 |     dims.nbDims = values.size();
 63 |     std::copy(values.begin(), values.end(), dims.d);
 64 |     // Note: No dimension type information is included
 65 |     return dims;
 66 | }
 67 | 
 68 | template <>
 69 | nvinfer1::DimsHW OnnxAttrs::get<nvinfer1::DimsHW>(const std::string& key) const
 70 | {
 71 |     nvinfer1::Dims dims = this->get<nvinfer1::Dims>(key);
 72 |     assert(dims.nbDims == 2);
 73 |     return nvinfer1::DimsHW(dims.d[0], dims.d[1]);
 74 | }
 75 | 
 76 | template <>
 77 | nvinfer1::Permutation OnnxAttrs::get<nvinfer1::Permutation>(const std::string& key) const
 78 | {
 79 |     auto values = this->get<std::vector<int>>(key);
 80 |     nvinfer1::Permutation perm;
 81 |     std::copy(values.begin(), values.end(), perm.order);
 82 |     // Fill unused values with identity permutation
 83 |     for (int i = values.size(); i < nvinfer1::Dims::MAX_DIMS; ++i)
 84 |     {
 85 |         perm.order[i] = i;
 86 |     }
 87 |     return perm;
 88 | }
 89 | 
 90 | template <>
 91 | onnx2trt::ShapedWeights OnnxAttrs::get<onnx2trt::ShapedWeights>(const std::string& key) const
 92 | {
 93 |     ::onnx::TensorProto const& onnx_weights_tensor = this->at(key)->t();
 94 |     onnx2trt::ShapedWeights weights;
 95 |     bool success = convertOnnxWeights(onnx_weights_tensor, &weights, mCtx);
 96 |     if (!success)
 97 |     {
 98 |         throw std::runtime_error{"Unable to convert ONNX weights"};
 99 |     }
100 |     return weights;
101 | }
102 | 
103 | template <>
104 | nvinfer1::DataType OnnxAttrs::get<nvinfer1::DataType>(const std::string& key) const
105 | {
106 |     ::onnx::TensorProto::DataType onnx_dtype
107 |         = static_cast<::onnx::TensorProto::DataType>(this->at(key)->i());
108 |     nvinfer1::DataType dtype{};
109 |     if (!onnx2trt::convertDtype(onnx_dtype, &dtype))
110 |     {
111 |         dtype = static_cast<nvinfer1::DataType>(-1);
112 |     }
113 |     return dtype;
114 | }
115 | 
116 | template <>
117 | std::vector<nvinfer1::DataType> OnnxAttrs::get<std::vector<nvinfer1::DataType>>(const std::string& key) const
118 | {
119 |     auto attr = this->at(key)->ints();
120 |     auto onnx_dtypes = std::vector<int64_t>(attr.begin(), attr.end());
121 |     std::vector<nvinfer1::DataType> dtypes{};
122 |     for (auto onnx_dtype : onnx_dtypes)
123 |     {
124 |         nvinfer1::DataType dtype{};
125 |         if (!onnx2trt::convertDtype(static_cast<int32_t>(onnx_dtype), &dtype))
126 |         {
127 |             dtype = static_cast<nvinfer1::DataType>(-1);
128 |         }
129 |         dtypes.push_back(dtype);
130 |     }
131 |     return dtypes;
132 | }
133 | 
134 | inline nvinfer1::ActivationType activationStringToEnum(const std::string& type)
135 | {
136 |     if (type == "Relu")
137 |     {
138 |         return nvinfer1::ActivationType::kRELU;
139 |     }
140 |     if (type == "Tanh")
141 |     {
142 |         return nvinfer1::ActivationType::kTANH;
143 |     }
144 |     if (type == "Sigmoid")
145 |     {
146 |         return nvinfer1::ActivationType::kSIGMOID;
147 |     }
148 |     if (type == "LeakyRelu")
149 |     {
150 |         return nvinfer1::ActivationType::kLEAKY_RELU;
151 |     }
152 |     if (type == "ThresholdedRelu")
153 |     {
154 |         return nvinfer1::ActivationType::kTHRESHOLDED_RELU;
155 |     }
156 |     if (type == "ScaledTanh")
157 |     {
158 |         return nvinfer1::ActivationType::kSCALED_TANH;
159 |     }
160 |     if (type == "HardSigmoid")
161 |     {
162 |         return nvinfer1::ActivationType::kHARD_SIGMOID;
163 |     }
164 |     if (type == "Elu")
165 |     {
166 |         return nvinfer1::ActivationType::kELU;
167 |     }
168 |     if (type == "Softsign")
169 |     {
170 |         return nvinfer1::ActivationType::kSOFTSIGN;
171 |     }
172 |     if (type == "Softplus")
173 |     {
174 |         return nvinfer1::ActivationType::kSOFTPLUS;
175 |     }
176 |     throw std::runtime_error("Unknown activation type: " + type);
177 | }
178 | 
179 | template <>
180 | nvinfer1::ActivationType OnnxAttrs::get<nvinfer1::ActivationType>(const std::string& key) const
181 | {
182 |     const std::string type = this->get<std::string>(key);
183 |     return activationStringToEnum(type);
184 | }
185 | 
186 | template <>
187 | std::vector<nvinfer1::ActivationType> OnnxAttrs::get<std::vector<nvinfer1::ActivationType>>(
188 |     const std::string& key) const
189 | {
190 |     const auto strings = this->at(key)->strings();
191 |     std::vector<nvinfer1::ActivationType> actTypes;
192 |     for (const auto& str : strings)
193 |     {
194 |         actTypes.emplace_back(activationStringToEnum(str));
195 |     }
196 |     return actTypes;
197 | }
198 | 
199 | template <>
200 | const ::onnx::GraphProto& OnnxAttrs::get<const ::onnx::GraphProto&>(const std::string& key) const
201 | {
202 |     return this->at(key)->g();
203 | }
204 | 
205 | template <>
206 | nvinfer1::RNNOperation OnnxAttrs::get<nvinfer1::RNNOperation>(const std::string& key) const
207 | {
208 |     std::string op = this->get<std::string>(key);
209 |     if (op == std::string("relu"))
210 |     {
211 |         return nvinfer1::RNNOperation::kRELU;
212 |     }
213 |     if (op == std::string("tanh"))
214 |     {
215 |         return nvinfer1::RNNOperation::kTANH;
216 |     }
217 |     if (op == std::string("lstm"))
218 |     {
219 |         return nvinfer1::RNNOperation::kLSTM;
220 |     }
221 |     if (op == std::string("gru"))
222 |     {
223 |         return nvinfer1::RNNOperation::kGRU;
224 |     }
225 |     throw std::runtime_error("Unknown RNNOperation: " + op);
226 | }
227 | 
228 | template <>
229 | nvinfer1::RNNInputMode OnnxAttrs::get<nvinfer1::RNNInputMode>(const std::string& key) const
230 | {
231 |     std::string mode = this->get<std::string>(key);
232 |     if (mode == std::string("skip"))
233 |     {
234 |         return nvinfer1::RNNInputMode::kSKIP;
235 |     }
236 |     if (mode == std::string("linear"))
237 |     {
238 |         return nvinfer1::RNNInputMode::kLINEAR;
239 |     }
240 |     throw std::runtime_error("Unknown RNNInputMode: " + mode);
241 | }
242 | 
243 | template <>
244 | nvinfer1::RNNDirection OnnxAttrs::get<nvinfer1::RNNDirection>(const std::string& key) const
245 | {
246 |     std::string direction = this->get<std::string>(key);
247 |     if (direction == std::string("unidirection"))
248 |     {
249 |         return nvinfer1::RNNDirection::kUNIDIRECTION;
250 |     }
251 |     if (direction == std::string("bidirection"))
252 |     {
253 |         return nvinfer1::RNNDirection::kBIDIRECTION;
254 |     }
255 |     throw std::runtime_error("Unknown RNNDirection: " + direction);
256 | }
257 | 
258 | template <>
259 | std::vector<std::string> OnnxAttrs::get<std::vector<std::string>>(const std::string& key) const
260 | {
261 |     auto attr = this->at(key)->strings();
262 |     return std::vector<std::string>(attr.begin(), attr.end());
263 | }
264 | 
265 | template <>
266 | nvinfer1::ScaleMode OnnxAttrs::get<nvinfer1::ScaleMode>(const std::string& key) const
267 | {
268 |     std::string s = this->get<std::string>(key);
269 |     if (s == "uniform")
270 |     {
271 |         return nvinfer1::ScaleMode::kUNIFORM;
272 |     }
273 |     if (s == "channel")
274 |     {
275 |         return nvinfer1::ScaleMode::kCHANNEL;
276 |     }
277 |     if (s == "elementwise")
278 |     {
279 |         return nvinfer1::ScaleMode::kELEMENTWISE;
280 |     }
281 |     throw std::runtime_error("Unknown ScaleMode: " + s);
282 | }
283 | 
284 | template <>
285 | nvinfer1::MatrixOperation OnnxAttrs::get<nvinfer1::MatrixOperation>(const std::string& key) const
286 | {
287 |     std::string s = this->get<std::string>(key);
288 |     if (s == "none")
289 |     {
290 |         return nvinfer1::MatrixOperation::kNONE;
291 |     }
292 |     if (s == "transpose")
293 |     {
294 |         return nvinfer1::MatrixOperation::kTRANSPOSE;
295 |     }
296 |     if (s == "vector")
297 |     {
298 |         return nvinfer1::MatrixOperation::kVECTOR;
299 |     }
300 |     throw std::runtime_error("Unknown MatrixOperation: " + s);
301 | }
302 | 
303 | template <>
304 | nvinfer1::ResizeMode OnnxAttrs::get<nvinfer1::ResizeMode>(const std::string& key) const
305 | {
306 |     const auto& mode = this->get<std::string>(key);
307 |     if (mode == "nearest")
308 |     {
309 |         return nvinfer1::ResizeMode::kNEAREST;
310 |     }
311 |     if (mode == "linear")
312 |     {
313 |         return nvinfer1::ResizeMode::kLINEAR;
314 |     }
315 |     throw std::runtime_error("Unknown ResizeMode: " + mode);
316 | }
317 | 
318 | template <>
319 | nvinfer1::ResizeCoordinateTransformation OnnxAttrs::get<nvinfer1::ResizeCoordinateTransformation>(
320 |     const std::string& key) const
321 | {
322 |     const auto& transformation = this->get<std::string>(key);
323 |     if (transformation == "align_corners")
324 |     {
325 |         return nvinfer1::ResizeCoordinateTransformation::kALIGN_CORNERS;
326 |     }
327 |     if (transformation == "asymmetric")
328 |     {
329 |         return nvinfer1::ResizeCoordinateTransformation::kASYMMETRIC;
330 |     }
331 |     if (transformation == "half_pixel")
332 |     {
333 |         return nvinfer1::ResizeCoordinateTransformation::kHALF_PIXEL;
334 |     }
335 |     throw std::runtime_error("Unknown ResizeCoordinateTransformation: " + transformation);
336 | }
337 | 
338 | template <>
339 | nvinfer1::ResizeSelector OnnxAttrs::get<nvinfer1::ResizeSelector>(const std::string& key) const
340 | {
341 |     const auto& selector = this->get<std::string>(key);
342 |     if (selector == "formula")
343 |     {
344 |         return nvinfer1::ResizeSelector::kFORMULA;
345 |     }
346 |     if (selector == "upper")
347 |     {
348 |         return nvinfer1::ResizeSelector::kUPPER;
349 |     }
350 |     throw std::runtime_error("Unknown ResizeSelector: " + selector);
351 | }
352 | 
353 | template <>
354 | nvinfer1::ResizeRoundMode OnnxAttrs::get<nvinfer1::ResizeRoundMode>(const std::string& key) const
355 | {
356 |     const auto& roundMode = this->get<std::string>(key);
357 |     if (roundMode == "half_up")
358 |     {
359 |         return nvinfer1::ResizeRoundMode::kHALF_UP;
360 |     }
361 |     if (roundMode == "half_down")
362 |     {
363 |         return nvinfer1::ResizeRoundMode::kHALF_DOWN;
364 |     }
365 |     if (roundMode == "floor")
366 |     {
367 |         return nvinfer1::ResizeRoundMode::kFLOOR;
368 |     }
369 |     if (roundMode == "ceil")
370 |     {
371 |         return nvinfer1::ResizeRoundMode::kCEIL;
372 |     }
373 |     throw std::runtime_error("Unknown ResizeRoundMode: " + roundMode);
374 | }
375 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/OnnxAttrs.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <NvInfer.h>
 8 | #include <onnx/onnx_pb.h>
 9 | #include <unordered_map>
10 | #include <vector>
11 | 
12 | #include "ImporterContext.hpp"
13 | 
14 | class OnnxAttrs
15 | {
16 |     template <typename T>
17 |     using string_map = std::unordered_map<std::string, T>;
18 |     typedef string_map<::onnx::AttributeProto const*> AttrMap;
19 |     AttrMap _attrs;
20 |     onnx2trt::IImporterContext* mCtx;
21 | 
22 | public:
23 |     explicit OnnxAttrs(::onnx::NodeProto const& onnx_node, onnx2trt::IImporterContext* ctx)
24 |         : mCtx{ctx}
25 |     {
26 |         for (auto const& attr : onnx_node.attribute())
27 |         {
28 |             _attrs.insert({attr.name(), &attr});
29 |         }
30 |     }
31 | 
32 |     bool count(const std::string& key) const
33 |     {
34 |         return _attrs.count(key);
35 |     }
36 | 
37 |     ::onnx::AttributeProto const* at(std::string key) const
38 |     {
39 |         if (!_attrs.count(key))
40 |         {
41 |             throw std::out_of_range("Attribute not found: " + key);
42 |         }
43 |         return _attrs.at(key);
44 |     }
45 | 
46 |     ::onnx::AttributeProto::AttributeType type(const std::string& key) const
47 |     {
48 |         return this->at(key)->type();
49 |     }
50 | 
51 | 
52 |     template <typename T>
53 |     T get(const std::string& key) const;
54 | 
55 |     template <typename T>
56 |     T get(const std::string& key, T const& default_value) const
57 |     {
58 |         return _attrs.count(key) ? this->get<T>(key) : default_value;
59 |     }
60 | };
61 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/RNNHelpers.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #include "RNNHelpers.hpp"
  6 | #include "LoopHelpers.hpp"
  7 | #include "onnx2trt_utils.hpp"
  8 | #include <array>
  9 | 
 10 | namespace onnx2trt
 11 | {
 12 | 
 13 | nvinfer1::ITensor* addRNNInput(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, std::vector<TensorOrWeights>& inputs, const std::string& direction)
 14 | {
 15 |     // In the forward/reverse cases, we only use a single iterator. In the bidirectional case, a forward and reverse
 16 |     // iterator must be concatenated.
 17 |     // Input dimensions: [1, B, E]
 18 |     nvinfer1::ITensor* iterationInput{nullptr};
 19 |     nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx);
 20 | 
 21 |     const int sequenceLenIndex = 4;
 22 |     bool isRagged = inputs.size() > sequenceLenIndex && inputs.at(sequenceLenIndex);
 23 | 
 24 |     if (direction == "forward")
 25 |     {
 26 |         iterationInput = unsqueezeTensor(ctx, node, *loop->addIterator(*input)->getOutput(0), std::vector<int>{0});
 27 | 
 28 |         if (isRagged)
 29 |         {
 30 |             nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);
 31 |             auto maxLen = getAxisLength(ctx, input, 0);
 32 |             iterationInput = clearMissingSequenceElements(ctx, node, loop, seqLens, iterationInput, maxLen);
 33 |         }
 34 |     }
 35 |     else if (direction == "reverse")
 36 |     {
 37 |         nvinfer1::IIteratorLayer* reverseIterator = loop->addIterator(*input);
 38 |         reverseIterator->setReverse(true);
 39 |         iterationInput = unsqueezeTensor(ctx, node, *reverseIterator->getOutput(0), std::vector<int>{0});
 40 |         if (isRagged)
 41 |         {
 42 |             nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);
 43 |             auto maxLen = getAxisLength(ctx, input, 0);
 44 |             iterationInput = clearMissingSequenceElements(ctx, node, loop, seqLens, iterationInput, maxLen, true);
 45 |         }
 46 |     }
 47 |     else if (direction == "bidirectional")
 48 |     {
 49 |         nvinfer1::IIteratorLayer* forward = loop->addIterator(*input);
 50 |         nvinfer1::IIteratorLayer* reverse = loop->addIterator(*input);
 51 |         reverse->setReverse(true);
 52 | 
 53 |         auto forwardInput = unsqueezeTensor(ctx, node, *forward->getOutput(0), std::vector<int>{0});
 54 |         auto reverseInput = unsqueezeTensor(ctx, node, *reverse->getOutput(0), std::vector<int>{0});
 55 |         if (isRagged)
 56 |         {
 57 |             nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx);
 58 |             auto counter = addLoopCounter(ctx, loop);
 59 |             auto maxLen = getAxisLength(ctx, input, 0);
 60 |             forwardInput = clearMissingSequenceElements(ctx, node, loop, seqLens, forwardInput, maxLen, false, counter);
 61 |             reverseInput = clearMissingSequenceElements(ctx, node, loop, seqLens, reverseInput, maxLen, true, counter);
 62 |         }
 63 | 
 64 |         // Stack on the 0th axis to create a (numDirections, B, E) tensor.
 65 |         std::array<nvinfer1::ITensor*, 2> tensors{{forwardInput, reverseInput}};
 66 |         nvinfer1::IConcatenationLayer* concat = ctx->network()->addConcatenation(tensors.data(), 2);
 67 |         concat->setAxis(0);
 68 |         iterationInput = concat->getOutput(0);
 69 |     }
 70 |     if (iterationInput)
 71 |     {
 72 |         LOG_VERBOSE("Input shape: " << iterationInput->getDimensions());
 73 |     }
 74 |     return iterationInput;
 75 | }
 76 | 
 77 | nvinfer1::ITensor* clearMissingSequenceElements(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop,
 78 |     nvinfer1::ITensor* seqLens, nvinfer1::ITensor* toMask, nvinfer1::ITensor* maxLen, bool reverse,
 79 |     nvinfer1::ITensor* counter)
 80 | {
 81 |     nvinfer1::ITensor* zero
 82 |         = addConstantScalar(ctx, 0.f, ::onnx::TensorProto::FLOAT, nvinfer1::Dims3(1, 1, 1))->getOutput(0);
 83 |     nvinfer1::ITensor* seqMask = getRaggedMask(ctx, node, loop, seqLens, maxLen, reverse, counter);
 84 |     return ctx->network()->addSelect(*seqMask, *toMask, *zero)->getOutput(0);
 85 | }
 86 | 
 87 | nvinfer1::ITensor* maskRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens,
 88 |     nvinfer1::ITensor* prevH, nvinfer1::ITensor* Ht, nvinfer1::ITensor* maxLen, bool reverse,
 89 |     nvinfer1::ITensor* counter)
 90 | {
 91 |     // maxLen must be provided if reverse is true
 92 |     // Forwards previous hidden state if invalid
 93 |     nvinfer1::ITensor* valid = getRaggedMask(ctx, node, loop, seqLens, maxLen, reverse, counter);
 94 |     return ctx->network()->addSelect(*valid, *Ht, *prevH)->getOutput(0);
 95 | }
 96 | 
 97 | nvinfer1::ITensor* maskBidirRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens,
 98 |     nvinfer1::ITensor* maxLen, nvinfer1::ITensor* Ht1, nvinfer1::ITensor* Ht, nvinfer1::ITensor* singlePassShape)
 99 | {
100 |     // Splits hidden state into forward and backward states, masks each accordingly, then concatenates
101 | 
102 |     nvinfer1::ITensor* forwardStart
103 |         = addConstant(ctx, std::vector<int32_t>{0, 0, 0}, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 3})
104 |               ->getOutput(0);
105 |     nvinfer1::ITensor* reverseStart
106 |         = addConstant(ctx, std::vector<int32_t>{1, 0, 0}, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 3})
107 |               ->getOutput(0);
108 | 
109 |     nvinfer1::ISliceLayer* HtForwardLayer
110 |         = ctx->network()->addSlice(*Ht, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1});
111 |     HtForwardLayer->setInput(1, *forwardStart);
112 |     HtForwardLayer->setInput(2, *singlePassShape);
113 | 
114 |     nvinfer1::ISliceLayer* HtBackwardLayer
115 |         = ctx->network()->addSlice(*Ht, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1});
116 |     HtBackwardLayer->setInput(1, *reverseStart);
117 |     HtBackwardLayer->setInput(2, *singlePassShape);
118 | 
119 |     nvinfer1::ISliceLayer* Ht1ForwardLayer
120 |         = ctx->network()->addSlice(*Ht1, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1});
121 |     Ht1ForwardLayer->setInput(1, *forwardStart);
122 |     Ht1ForwardLayer->setInput(2, *singlePassShape);
123 | 
124 |     nvinfer1::ISliceLayer* Ht1BackwardLayer
125 |         = ctx->network()->addSlice(*Ht1, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1});
126 |     Ht1BackwardLayer->setInput(1, *reverseStart);
127 |     Ht1BackwardLayer->setInput(2, *singlePassShape);
128 | 
129 |     auto forwardHt = HtForwardLayer->getOutput(0);
130 |     auto backwardHt = HtBackwardLayer->getOutput(0);
131 |     auto forwardHt1 = Ht1ForwardLayer->getOutput(0);
132 |     auto backwardHt1 = Ht1BackwardLayer->getOutput(0);
133 | 
134 |     auto counter = addLoopCounter(ctx, loop, 0);
135 |     forwardHt = maskRNNHidden(ctx, node, loop, seqLens, forwardHt1, forwardHt, maxLen, false, counter);
136 |     backwardHt = maskRNNHidden(ctx, node, loop, seqLens, backwardHt1, backwardHt, maxLen, true, counter);
137 |     std::array<nvinfer1::ITensor*, 2> tensors{{forwardHt, backwardHt}};
138 |     nvinfer1::IConcatenationLayer* concat = ctx->network()->addConcatenation(tensors.data(), 2);
139 |     concat->setAxis(0);
140 |     return concat->getOutput(0);
141 | }
142 | 
143 | nvinfer1::ITensor* getRaggedMask(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens,
144 |     nvinfer1::ITensor* maxLen, bool reverse, nvinfer1::ITensor* counter)
145 | {
146 |     // Returns a bool tensor which is true where the elements are valid (within the sequence) and false when outside the
147 |     // sequence.
148 |     // maxLen must be provided if reverse is true
149 |     assert(!reverse || maxLen);
150 | 
151 |     if (!counter)
152 |     {
153 |         counter = addLoopCounter(ctx, loop, 0);
154 |     }
155 | 
156 |     // Create Mask
157 |     nvinfer1::ITensor* seqMask;
158 |     if (reverse)
159 |     {
160 |         counter
161 |             = ctx->network()
162 |                   ->addElementWise(*unsqueezeTensor(ctx, node, *maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB)
163 |                   ->getOutput(0);
164 |         seqMask
165 |             = ctx->network()->addElementWise(*seqLens, *counter, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0);
166 |         seqMask = ctx->network()->addUnary(*seqMask, nvinfer1::UnaryOperation::kNOT)->getOutput(0);
167 |     }
168 |     else
169 |     {
170 |         seqMask
171 |             = ctx->network()->addElementWise(*counter, *seqLens, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0);
172 |     }
173 |     return unsqueezeTensor(ctx, node, *seqMask, std::vector<int>{0, 2});
174 | }
175 | 
176 | } // namespace onnx2trt
177 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/RNNHelpers.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <NvInfer.h>
 8 | #include <string>
 9 | #include <vector>
10 | 
11 | #include "TensorOrWeights.hpp"
12 | #include "ImporterContext.hpp"
13 | 
14 | namespace onnx2trt
15 | {
16 | 
17 | nvinfer1::ITensor* addRNNInput(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, std::vector<TensorOrWeights>& inputs, const std::string& direction);
18 | 
19 | // Zeros out invalid timesteps in toMask. maxLen must be provided if reverse is true
20 | nvinfer1::ITensor* clearMissingSequenceElements(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* toMask, nvinfer1::ITensor* maxLen, bool reverse = false, nvinfer1::ITensor* counter = nullptr);
21 | 
22 | // Returns a bool tensor which is true during valid timesteps
23 | nvinfer1::ITensor* getRaggedMask(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* maxLen = nullptr, bool reverse = false, nvinfer1::ITensor* counter = nullptr);
24 | 
25 | // Selects between prevH and Ht to forward previous hidden state through invalid timesteps
26 | nvinfer1::ITensor* maskRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* prevH, nvinfer1::ITensor* Ht, nvinfer1::ITensor* maxLen = nullptr, bool reverse = false, nvinfer1::ITensor* counter = nullptr);
27 | 
28 | // Splits a bidirectional hidden state into forward and reverse passes, masks each using maskRNNHidden, then concatenates
29 | nvinfer1::ITensor* maskBidirRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* maxLen, nvinfer1::ITensor* Ht1, nvinfer1::ITensor* Ht, nvinfer1::ITensor* singlePassShape);
30 | 
31 | } // namespace onnx2trt
32 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/ShapeTensor.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #include "ShapeTensor.hpp"
  6 | #include "TensorOrWeights.hpp"
  7 | #include "onnx2trt_utils.hpp"
  8 | #include <algorithm>
  9 | #include <functional>
 10 | 
 11 | namespace onnx2trt
 12 | {
 13 | 
 14 | //! If true, tolerate bug where scalar constant of type FLOAT is missing its value,
 15 | //! and a shape tensor is expected.
 16 | static const bool gTolerateTRT_12408 = true;
 17 | 
 18 | ShapeTensor::ShapeTensor(int rank_, std::vector<int64_t>&& values_)
 19 |     : mDepth(0)
 20 |     , mAllValuesKnown(true)
 21 |     , mRank(rank_)
 22 |     , mSize(values_.size())
 23 |     , mValues(std::move(values_))
 24 | {
 25 |     assert((rank_ == 0 || rank_ == 1) && "shape tensor must have rank 0 or 1");
 26 |     assert(rank_ > 0 || mValues.size() == 1);
 27 | }
 28 | 
 29 | ShapeTensor::ShapeTensor(IImporterContext* ctx, TensorOrWeights& t)
 30 |     : mDepth(0)
 31 | {
 32 |     if (t.is_tensor())
 33 |     {
 34 |         *this = ShapeTensor(t.tensor());
 35 |     }
 36 |     else
 37 |     {
 38 |         const nvinfer1::Dims d = t.shape();
 39 |          const auto& weights = t.weights();
 40 |         if (gTolerateTRT_12408 && weights.type == ::onnx::TensorProto::FLOAT && d.nbDims == 0 && weights.count() == 0)
 41 |         {
 42 |             LOG_WARNING("Scalar constant of type FLOAT with no value encountered where ONNX specification requires tensor describing a shape. Assuming it's an INT64 empty vector.");
 43 |             mRank = 1;
 44 |             mSize = 0;
 45 |             mAllValuesKnown = true;
 46 |             return;
 47 |         }
 48 |         assert(0 <= d.nbDims);
 49 |         assert(d.nbDims <= 1 && "shape tensor must be 0D or 1D");
 50 |         mRank = d.nbDims;
 51 |         mSize = d.nbDims == 0 ? 1 : d.d[0];
 52 |         auto status = weightsToVector(weights, &mValues);
 53 |         if (status.code() != ErrorCode::kSUCCESS)
 54 |         {
 55 |             throw std::runtime_error("constant " + t.getName() + " is not a valid shape tensor");
 56 |         }
 57 |         mAllValuesKnown = true;
 58 |     }
 59 | }
 60 | 
 61 | static bool hasAllNonNegativeValues(const std::vector<int64_t>& values)
 62 | {
 63 |     return std::all_of(values.begin(), values.end(), [](int x) { return x >= 0; });
 64 | }
 65 | 
 66 | ShapeTensor::ShapeTensor(nvinfer1::ITensor& t, int depth)
 67 |     : mDepth(depth)
 68 |     , mRank(1)
 69 |     , mTensor(&t)
 70 | {
 71 |     const nvinfer1::Dims dims = t.getDimensions();
 72 | 
 73 |     switch (mDepth)
 74 |     {
 75 |     case 0:
 76 |         assert(t.getType() == nvinfer1::DataType::kINT32);
 77 |         mRank = dims.nbDims;
 78 |         if (mRank == 0)
 79 |         {
 80 |             mSize = 1;
 81 |         }
 82 |         else if (mRank == 1)
 83 |         {
 84 |             mSize = dims.d[0];
 85 |         }
 86 |         else
 87 |         {
 88 |             assert(mRank == -1);
 89 |         }
 90 |         break;
 91 | 
 92 |     case 1:
 93 |         if (dims.nbDims >= 0)
 94 |         {
 95 |             mSize = dims.nbDims;
 96 |             mValues.resize(dims.nbDims);
 97 |             std::copy_n(dims.d, dims.nbDims, mValues.begin());
 98 |             mAllValuesKnown = hasAllNonNegativeValues(mValues);
 99 |         }
100 |         break;
101 | 
102 |     case 2:
103 |         mSize = 1;
104 |         if (dims.nbDims >= 0)
105 |         {
106 |             mValues = {dims.nbDims};
107 |             mAllValuesKnown = hasAllNonNegativeValues(mValues);
108 |         }
109 |         break;
110 | 
111 |     case 3:
112 |         // Applying IShapeLayer three times always yields a 1D vector containing 1.
113 |         mDepth = 0;
114 |         mSize = 1;
115 |         mValues = {1};
116 |         mAllValuesKnown = true;
117 |         mTensor = nullptr;
118 |         break;
119 | 
120 |     default:
121 |         // Though depths greater than 3 could be handled the same as 3, they are
122 |         // likely a sign of a problem.  Depths less than 0 make no sense.
123 |         assert(0);
124 |         break;
125 |     }
126 | }
127 | 
128 | ShapeTensor shapeVector(int64_t value)
129 | {
130 |     return ShapeTensor(1, std::vector<int64_t>({value}));
131 | }
132 | 
133 | ShapeTensor shapeScalar(int64_t value)
134 | {
135 |     return ShapeTensor(0, std::vector<int64_t>({value}));
136 | }
137 | 
138 | bool ShapeTensor::valueKnown(int k) const
139 | {
140 |     assert(0 <= k);
141 |     assert(k < mSize);
142 |     return allValuesKnown() || (mValues.size() == static_cast<size_t>(mSize) && mValues[k] >= 0);
143 | }
144 | 
145 | bool ShapeTensor::isAll(int64_t x) const
146 | {
147 |     assert(mDepth >= 0 && "undefined tensor");
148 |     return allValuesKnown() && std::all_of(begin(), end(), [x](int64_t y) { return x == y; });
149 | }
150 | 
151 | nvinfer1::ITensor& ShapeTensor::tensor(IImporterContext* ctx) const
152 | {
153 |     assert(mDepth >= 0 && "undefined tensor");
154 |     assert(mDepth <= 2);
155 |     if (!mTensor || mDepth != 0)
156 |     {
157 |         // Need to create an ITensor representing *this.
158 |         if (allValuesKnown())
159 |         {
160 |             // Create constant
161 |             const nvinfer1::Dims dims{rank(), {size()}};
162 |             const nvinfer1::Weights w{nvinfer1::DataType::kINT32, convertINT64(mValues.data(), dims, ctx), size()};
163 |             mTensor = ctx->network()->addConstant(dims, w)->getOutput(0);
164 |             mDepth = 0;
165 |         }
166 |         else
167 |         {
168 |             assert(mTensor);
169 |             for (; mDepth > 0; --mDepth)
170 |             {
171 |                 mTensor = ctx->network()->addShape(*mTensor)->getOutput(0);
172 |             }
173 |         }
174 |     }
175 |     return *mTensor;
176 | }
177 | 
178 | ShapeTensor iotaShapeVector(int32_t n)
179 | {
180 |     std::vector<int64_t> values(n);
181 |     std::iota(values.begin(), values.end(), 0);
182 |     return ShapeTensor(1, std::move(values));
183 | }
184 | 
185 | ShapeTensor similar(IImporterContext* ctx, const ShapeTensor& exemplar, int64_t value)
186 | {
187 |     return fillShapeVector(ctx, value, shapeOf(exemplar));
188 | }
189 | 
190 | ShapeTensor fillShapeVector(IImporterContext* ctx, int64_t value, const ShapeTensor& count)
191 | {
192 |     assert(count.rank() == 1 && "implementation assumes 1D size");
193 |     assert(count.size() == 1 && "implementation assumes 1D size of known size");
194 |     if (count.allValuesKnown())
195 |     {
196 |         return ShapeTensor(1, std::vector<int64_t>(count[0], value));
197 |     }
198 |     else
199 |     {
200 |         nvinfer1::ISliceLayer* slice
201 |             = addSlice(ctx, shapeVector(value).tensor(ctx), shapeVector(0), count, shapeVector(0));
202 |         return ShapeTensor(*slice->getOutput(0));
203 |     }
204 | }
205 | 
206 | using nvinfer1::ElementWiseOperation;
207 | 
208 | //! Helper that implements an elementwise operations on two shape tensors x and y.
209 | //! f must implement the operation on a pair of int64_t.
210 | //! commutes should be true f is commutative.
211 | //! rightIdentity should be the right identity value for f.
212 | static ShapeTensor op(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y, ElementWiseOperation operation,
213 |     bool commutative, int64_t rightIdentity, const std::function<int64_t(int64_t, int64_t)>&& f)
214 | {
215 |     assert(!x.rankKnown() || !y.rankKnown() || x.rank() == y.rank());
216 |     if (x.sizeKnown() && y.sizeKnown())
217 |     {
218 |         assert(x.size() == 1 || y.size() == 1 || x.size() == y.size());
219 |         if (y.isAll(rightIdentity) && y.size() <= x.size())
220 |         {
221 |             return x;
222 |         }
223 |         if (commutative && x.isAll(rightIdentity) && x.size() <= y.size())
224 |         {
225 |             return y;
226 |         }
227 |     }
228 |     if (x.allValuesKnown() && y.allValuesKnown())
229 |     {
230 |         std::vector<int64_t> values(std::max(x.size(), y.size()));
231 |         for (size_t i = 0; i < values.size(); ++i)
232 |         {
233 |             // The % simulates broadcast rules.
234 |             values[i] = f(x[i % x.size()], y[i % y.size()]);
235 |         }
236 |         return ShapeTensor(x.rank(), std::move(values));
237 |     }
238 |     return ShapeTensor(*ctx->network()->addElementWise(x.tensor(ctx), y.tensor(ctx), operation)->getOutput(0), 0);
239 | }
240 | 
241 | ShapeTensor add(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
242 | {
243 |     return op(ctx, x, y, ElementWiseOperation::kSUM, true, 0, std::plus<int64_t>());
244 | }
245 | 
246 | ShapeTensor sub(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
247 | {
248 |     return op(ctx, x, y, ElementWiseOperation::kSUB, false, 0, std::minus<int64_t>());
249 | }
250 | 
251 | ShapeTensor mul(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
252 | {
253 |     return op(ctx, x, y, ElementWiseOperation::kPROD, true, 1, std::multiplies<int64_t>());
254 | }
255 | 
256 | ShapeTensor min(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
257 | {
258 |     return op(ctx, x, y, ElementWiseOperation::kMIN, true, std::numeric_limits<int64_t>::max(),
259 |         [](int64_t x, int64_t y) { return std::min(x, y); });
260 | }
261 | 
262 | ShapeTensor max(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
263 | {
264 |     return op(ctx, x, y, ElementWiseOperation::kMAX, true, std::numeric_limits<int64_t>::min(),
265 |         [](int64_t x, int64_t y) { return std::max(x, y); });
266 | }
267 | ShapeTensor floorDiv(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
268 | {
269 |     return op(ctx, x, y, ElementWiseOperation::kFLOOR_DIV, false, 1, [](int64_t x, int64_t y) {
270 |         assert(y != 0 && "divisor must be non-zero");
271 |         const int64_t d = x / y;
272 |         return d * y == x ? d : d - ((x < 0) ^ (y < 0));
273 |     });
274 | }
275 | 
276 | ShapeTensor broadcast(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
277 | {
278 |     // max(x,y) works unless x or y is 0.
279 |     // min(x,y,1) yields 0 if x or y is 0, and 1 otherwise.
280 |     // So compute max(x,y)*min(x,y,1).
281 |     return mul(ctx, max(ctx, x, y), min(ctx, x, min(ctx, y, similar(ctx, y, 1))));
282 | }
283 | 
284 | ShapeTensor product(IImporterContext* ctx, const ShapeTensor& x, int first, int last, int rank)
285 | {
286 |     assert(first <= last);
287 |     ShapeTensor z(rank, std::vector<int64_t>(1, 1));
288 |     for (int i = first; i < last; ++i)
289 |     {
290 |         z = mul(ctx, z, gather(ctx, x, ShapeTensor(rank, std::vector<int64_t>(1, i))));
291 |     }
292 |     return z;
293 | }
294 | 
295 | ShapeTensor concat(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y)
296 | {
297 |     assert(!x.rankKnown() || x.rank() == 1);
298 |     assert(!y.rankKnown() || y.rank() == 1);
299 |     if (x.sizeKnown() && x.size() == 0)
300 |     {
301 |         return y;
302 |     }
303 |     if (y.sizeKnown() && y.size() == 0)
304 |     {
305 |         return x;
306 |     }
307 |     if (x.allValuesKnown() && y.allValuesKnown())
308 |     {
309 |         std::vector<int64_t> values(x.size() + y.size());
310 |         auto p = std::copy(x.begin(), x.end(), values.begin());
311 |         std::copy(y.begin(), y.end(), p);
312 |         return ShapeTensor(1, std::move(values));
313 |     }
314 | 
315 |     nvinfer1::ITensor* const args[2] = {&x.tensor(ctx), &y.tensor(ctx)};
316 |     return ShapeTensor(*ctx->network()->addConcatenation(args, 2)->getOutput(0));
317 | }
318 | 
319 | ShapeTensor gather(IImporterContext* ctx, const ShapeTensor& data, const ShapeTensor& indices)
320 | {
321 |     assert(data.rank() == 1);
322 |     if (indices.allValuesKnown()
323 |         && std::all_of(indices.begin(), indices.end(), [&data](int i) { return data.valueKnown(i); }))
324 |     {
325 |         std::vector<int64_t> z(indices.size());
326 |         std::transform(indices.begin(), indices.end(), z.begin(), [&data](int64_t i) {
327 |             assert(0 <= i);
328 |             assert(i < data.size());
329 |             return data[i];
330 |         });
331 |         return ShapeTensor(indices.rank(), std::move(z));
332 |     }
333 |     return ShapeTensor(*ctx->network()->addGather(data.tensor(ctx), indices.tensor(ctx), 0)->getOutput(0));
334 | }
335 | 
336 | ShapeTensor shapeOf(nvinfer1::ITensor& tensor)
337 | {
338 |     return ShapeTensor(tensor, 1);
339 | }
340 | 
341 | ShapeTensor shapeOf(TensorOrWeights& t)
342 | {
343 |     if (t.is_tensor())
344 |     {
345 |         return shapeOf(t.tensor());
346 |     }
347 |     const nvinfer1::Dims& d = t.weights().shape;
348 |     return ShapeTensor(1, std::vector<int64_t>(d.d, d.d + d.nbDims));
349 | }
350 | 
351 | ShapeTensor shapeOf(const ShapeTensor& t)
352 | {
353 |     assert(t.mDepth >= 0);
354 |     if (t.mTensor)
355 |     {
356 |         return ShapeTensor(*t.mTensor, t.mDepth + 1);
357 |     }
358 |     assert(t.rankKnown());
359 |     assert(t.sizeKnown());
360 |     // ShapeTensor is either a scalar or vector.
361 |     // shape of a scalar is an empty tensor.
362 |     // shape of a vector is a one-element tensor containing the length of the vector.
363 |     return t.rank() == 0 ? ShapeTensor(0, {}) : ShapeTensor(1, {t.size()});
364 | }
365 | 
366 | ShapeTensor convertTo1D(IImporterContext* ctx, const ShapeTensor& tensor)
367 | {
368 |     assert(tensor.rank() == 0);
369 |     assert(tensor.size() == 1);
370 |     if (tensor.valueKnown(0))
371 |     {
372 |         return shapeScalar(tensor[0]);
373 |     }
374 |     return ShapeTensor(*addShuffle(ctx, tensor.tensor(ctx), shapeVector(1))->getOutput(0));
375 | }
376 | 
377 | //! If all values of x are known, return Dims with those values,
378 | //! but throw exception if any value is outside specified bounds.
379 | //! Otherwise return Dims with zeros.
380 | //!
381 | //! The string that should describe the context of the dimensions,
382 | //! e.g. "reshape" or "fill output".
383 | static nvinfer1::Dims toDims(const ShapeTensor& x, const char* what, int32_t minAllowed, int32_t maxAllowed)
384 | {
385 |     nvinfer1::Dims d{-1, {}};
386 |     if (x.sizeKnown())
387 |     {
388 |         d.nbDims = x.size();
389 |         if (x.allValuesKnown())
390 |         {
391 |             assert(x.size() <= nvinfer1::Dims::MAX_DIMS);
392 |             for (const auto& dim : x)
393 |             {
394 |                 if (dim < minAllowed || dim > maxAllowed)
395 |                 {
396 |                     std::ostringstream msg;
397 |                     msg << what << " dimensions have value " << dim << " beyond allowed bounds." << std::endl;
398 |                     throw std::runtime_error(msg.str());
399 |                 }
400 |             }
401 |             std::copy(x.begin(), x.end(), d.d);
402 |         }
403 |     }
404 |     return d;
405 | }
406 | 
407 | //! If not all values in x are known, set layer input specifed by inputIndex
408 | //! to tensor with value of x.
409 | static void setShapeInputIfDynamic(IImporterContext* ctx, nvinfer1::ILayer* layer, int inputIndex, const ShapeTensor& x)
410 | {
411 |     if (!x.allValuesKnown())
412 |     {
413 |         layer->setInput(inputIndex, x.tensor(ctx));
414 |     }
415 | }
416 | 
417 | bool operator==(const ShapeTensor& x, const ShapeTensor& y)
418 | {
419 |     if (x.allValuesKnown() && y.allValuesKnown())
420 |     {
421 |         return x.mValues == y.mValues;
422 |     }
423 |     assert(x.mTensor || y.mTensor);
424 |     return x.mTensor == y.mTensor && x.mDepth == y.mDepth;
425 | }
426 | 
427 | nvinfer1::ITensor& reshape(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& newShape)
428 | {
429 |     const ShapeTensor oldShape = shapeOf(data);
430 |     if (newShape == oldShape)
431 |     {
432 |         return data;
433 |     }
434 |     return *addShuffle(ctx, data, newShape)->getOutput(0);
435 | }
436 | 
437 | nvinfer1::IShuffleLayer* addShuffle(
438 |     IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& reshapeDims, bool zeroIsPlaceholder)
439 | {
440 |     nvinfer1::IShuffleLayer* shuffle = ctx->network()->addShuffle(data);
441 |     if (reshapeDims.allValuesKnown())
442 |     {
443 |         shuffle->setReshapeDimensions(toDims(reshapeDims, "reshape", -1, std::numeric_limits<int32_t>::max()));
444 |     }
445 |     else
446 |     {
447 |         shuffle->setInput(1, reshapeDims.tensor(ctx));
448 |     }
449 |     shuffle->setZeroIsPlaceholder(zeroIsPlaceholder);
450 |     return shuffle;
451 | }
452 | 
453 | nvinfer1::ISliceLayer* addSlice(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& starts,
454 |     const ShapeTensor& sizes, const ShapeTensor& strides)
455 | {
456 |     constexpr int32_t minDim = std::numeric_limits<int32_t>::min();
457 |     constexpr int32_t maxDim = std::numeric_limits<int32_t>::max();
458 |     nvinfer1::ISliceLayer* slice = ctx->network()->addSlice(data, toDims(starts, "slice start", 0, maxDim),
459 |         toDims(sizes, "slice size", 0, maxDim), toDims(strides, "slide strides", minDim, maxDim));
460 |     setShapeInputIfDynamic(ctx, slice, 1, starts);
461 |     setShapeInputIfDynamic(ctx, slice, 2, sizes);
462 |     setShapeInputIfDynamic(ctx, slice, 3, strides);
463 |     return slice;
464 | }
465 | 
466 | nvinfer1::IFillLayer* addFill(IImporterContext* ctx, const ShapeTensor& shape, nvinfer1::FillOperation op)
467 | {
468 |     nvinfer1::IFillLayer* fill
469 |         = ctx->network()->addFill(toDims(shape, "fill output", 0, std::numeric_limits<int32_t>::max()), op);
470 |     setShapeInputIfDynamic(ctx, fill, 0, shape);
471 |     return fill;
472 | }
473 | 
474 | std::ostream& operator<<(std::ostream& stream, const ShapeTensor& x)
475 | {
476 |     stream << "(";
477 |     for (int i = 0, e = x.size(); i < e; ++i)
478 |     {
479 |         stream << (i ? ", " : "");
480 |         if (x.valueKnown(i))
481 |         {
482 |             stream << x[i];
483 |         }
484 |         else
485 |         {
486 |             stream << "_";
487 |         }
488 |     }
489 |     if (x.size() == 1 && x.rank() == 1)
490 |     {
491 |         // Use Python convention to distinguish 1-element vector from a scalar.
492 |         stream << ",";
493 |     }
494 |     return stream << ")";
495 | }
496 | 
497 | } // namespace onnx2trt
498 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/ShapeTensor.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <NvInfer.h>
  8 | #include <cassert>
  9 | #include <iosfwd>
 10 | #include <vector>
 11 | 
 12 | namespace onnx2trt
 13 | {
 14 | 
 15 | class IImporterContext;
 16 | class TensorOrWeights;
 17 | 
 18 | //! Represents a 0D or 1D tensor of int64_t.
 19 | class ShapeTensor
 20 | {
 21 | public:
 22 |     //! Create undefined ShapeTensor.
 23 |     ShapeTensor() = default;
 24 | 
 25 |     //! Create ShapeTensor with known rank and values.
 26 |     ShapeTensor(int rank_, std::vector<int64_t>&& values_);
 27 | 
 28 |     //! Create ShapeTensor representing value of TensorOrWeights.
 29 |     ShapeTensor(IImporterContext* ctx, TensorOrWeights& t);
 30 | 
 31 |     //! Construct ShapeTensor equivalent to applying IShapeLayer depth times.
 32 |     //! The depth may be in [0,3].
 33 |     explicit ShapeTensor(nvinfer1::ITensor& t, int depth = 0);
 34 | 
 35 |     //! True if rank is known.
 36 |     bool rankKnown() const
 37 |     {
 38 |         return mRank != kRANK_UNKNOWN;
 39 |     }
 40 | 
 41 |     //! Number of dimensions.  Always 0 or 1.
 42 |     int32_t rank() const
 43 |     {
 44 |         assert(rankKnown());
 45 |         return mRank;
 46 |     }
 47 | 
 48 |     //! True if number of elements in tensor is known.
 49 |     bool sizeKnown() const
 50 |     {
 51 |         return mSize != kSIZE_UNKNOWN;
 52 |     }
 53 | 
 54 |     //! Number of elements in the tensor.  Asserts that sizeKnown()==true.
 55 |     int32_t size() const
 56 |     {
 57 |         assert(sizeKnown());
 58 |         return mSize;
 59 |     }
 60 | 
 61 |     //! True if all element values are known.
 62 |     bool allValuesKnown() const
 63 |     {
 64 |         return mAllValuesKnown;
 65 |     }
 66 | 
 67 |     //! True if all element values equal the given value.
 68 |     bool isAll(int64_t value) const;
 69 | 
 70 |     using const_iterator = std::vector<int64_t>::const_iterator;
 71 | 
 72 |     //! Iterator pointing to beginning of sequence of element values.
 73 |     //! Requires that allValuesKnown() is true.
 74 |     const_iterator begin() const
 75 |     {
 76 |         assert(mAllValuesKnown);
 77 |         return mValues.begin();
 78 |     }
 79 | 
 80 |     //! Iterator pointing to end of sequence of element values.
 81 |     //! Requires that allValuesKnown() is true.
 82 |     const_iterator end() const
 83 |     {
 84 |         assert(mAllValuesKnown);
 85 |         return mValues.end();
 86 |     }
 87 | 
 88 |     //! True if operator[](k) is valid.
 89 |     bool valueKnown(int k) const;
 90 | 
 91 |     //! Return kth value.
 92 |     //! For a 0D tensor, k must be 0.
 93 |     //! Requires that valueKnown(k) is true.
 94 |     int64_t operator[](int k) const
 95 |     {
 96 |         assert(valueKnown(k));
 97 |         return mValues[k];
 98 |     }
 99 | 
100 |     //! Return true if x and y always have the same value.
101 |     friend bool operator==(const ShapeTensor& x, const ShapeTensor& y);
102 |     friend ShapeTensor shapeOf(const ShapeTensor& t);
103 | 
104 |     //! Get TensorRT tensor representation.
105 |     nvinfer1::ITensor& tensor(IImporterContext* ctx) const;
106 | 
107 |     const std::vector<int64_t>& values() const{return mValues;}
108 |     void set_values(const std::vector<int64_t>& values){mValues = values;}
109 | 
110 | private:
111 |     //! Number of IShapeLayer to apply to mTensor to get ITensor representing value of *this.
112 |     //! -1 for undefined *this, a value in [0,2] otherwise.
113 |     //! 0: *this represents value of the tensor (always 0D or 1D)
114 |     //! 1: *this represents shape of mTensor (always 1D)
115 |     //! 2: *this represents rank of mTensor (always 1D tensor of length 1)
116 |     mutable int8_t mDepth{-1};
117 | 
118 |     //! True if all values are known.
119 |     bool mAllValuesKnown{false};
120 | 
121 |     static constexpr int kRANK_UNKNOWN = -1;
122 |     static constexpr int kSIZE_UNKNOWN = -1;
123 | 
124 |     //! Rank of *this.
125 |     //! Always -1, 0 or 1.
126 |     int8_t mRank{kRANK_UNKNOWN};
127 | 
128 |     //! Number of elements in the tensor, or -1 if unknown.
129 |     int32_t mSize{kSIZE_UNKNOWN};
130 | 
131 |     //! Must be non-null if mAllValuesKnown.
132 |     mutable nvinfer1::ITensor* mTensor{nullptr};
133 | 
134 |     //! Values of elements if some might be known.
135 |     //! mValues.size() is always zero or equal to mSize.
136 |     //! When mAllValuesKnown==true, all the values in mValues are correct
137 |     //! and mValues.size() == mSize.
138 |     //! When mAllValuesKnown==false, only the non-negative values in mValues
139 |     //! are guaranteed to be correct, and only so if mValues.size() == mSize.
140 |     std::vector<int64_t> mValues;
141 | };
142 | 
143 | //! Print ShapeTensor.  Unknown values are printed as _.
144 | std::ostream& operator<<(std::ostream& stream, const ShapeTensor& x);
145 | 
146 | //! Create 1D ShapeTensor of length n filled with value.
147 | //! count must be 1D ShapeTensor of size 1.
148 | ShapeTensor fillShapeVector(IImporterContext* ctx, int64_t value, const ShapeTensor& count);
149 | 
150 | //! Create 1D ShapeTensor of length 1 containing given value.
151 | ShapeTensor shapeVector(int64_t value);
152 | 
153 | //! Create 0D ShapeTensor containing the given value.
154 | ShapeTensor shapeScalar(int64_t value);
155 | 
156 | //! Create 1D ShapeTensor containing [0,n).
157 | ShapeTensor iotaShapeVector(int32_t n);
158 | 
159 | //! Create ShapeTensor filled with value that has same shape as exemplar.
160 | //! The exemplar must be 1D.
161 | ShapeTensor similar(IImporterContext* ctx, const ShapeTensor& exemplar, int64_t value);
162 | 
163 | //! Elementwise addition
164 | ShapeTensor add(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
165 | 
166 | //! Elementwise subtraction
167 | ShapeTensor sub(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
168 | 
169 | //! Elementwise multiplication
170 | ShapeTensor mul(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
171 | 
172 | //! Elementwise min
173 | ShapeTensor min(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
174 | 
175 | //! Elementwise max
176 | ShapeTensor max(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
177 | 
178 | //! Elementwise floor division
179 | ShapeTensor floorDiv(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
180 | 
181 | //! Elementwise f, for a partial function f defined by:
182 | //! f(x,x) = x
183 | //! f(1,x) = x
184 | //! f(x,1) = x
185 | //! Undefined otherwise or if x < 0.
186 | ShapeTensor broadcast(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
187 | 
188 | //! Return product of x[i] for i in [first..last), as 0D or one-element 1D tensor of given rank.
189 | ShapeTensor product(IImporterContext* ctx, const ShapeTensor& x, int first, int last, int rank);
190 | 
191 | //! Gather where data is 1D tensor and indices can be 0D or 1D
192 | ShapeTensor gather(IImporterContext* ctx, const ShapeTensor& data, const ShapeTensor& indices);
193 | 
194 | //! Concatenation of two 1D tensors
195 | ShapeTensor concat(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y);
196 | 
197 | //! Return gather(concat(x,y),subscripts)
198 | inline ShapeTensor interlace(
199 |     IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y, const ShapeTensor& subscripts)
200 | {
201 |     return gather(ctx, concat(ctx, x, y), subscripts);
202 | }
203 | 
204 | //! Return shape of a tensor.
205 | ShapeTensor shapeOf(nvinfer1::ITensor& tensor);
206 | ShapeTensor shapeOf(const ShapeTensor& tensor);
207 | ShapeTensor shapeOf(TensorOrWeights& t);
208 | 
209 | //! Reshape 0D tensor to 1D tensor.
210 | ShapeTensor convertTo1D(IImporterContext* ctx, const ShapeTensor& tensor);
211 | 
212 | //! Add an ISliceLayer.
213 | nvinfer1::ISliceLayer* addSlice(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& starts,
214 |     const ShapeTensor& sizes, const ShapeTensor& strides);
215 | 
216 | //! Add an IShuffleLayer.
217 | //! If the result does not need to have its parameters changed, and
218 | //! optimizing the no-op case away is okay, use function reshape instead.
219 | //!
220 | //! In general the default zeroIsPlaceholder=false should be used so
221 | //! that reshaping to empty tensors works correctly.  Calling with
222 | //! zeroIsPlaceholder=true should happen only when replicating the
223 | //! semantics of the ONNX Reshape operator.
224 | nvinfer1::IShuffleLayer* addShuffle(
225 |     IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& reshapeDims, bool zeroIsPlaceholder = false);
226 | 
227 | //! Add an IFillLayer.
228 | nvinfer1::IFillLayer* addFill(IImporterContext* ctx, const ShapeTensor& shape, nvinfer1::FillOperation op);
229 | 
230 | //! Reshape a tensor.
231 | //!
232 | //! Treats any zeros in newShape as dimensions, not placeholders.
233 | //! Implementation note: does not insert shuffle if it's a no-op.
234 | nvinfer1::ITensor& reshape(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& newShape);
235 | 
236 | } // namespace onnx2trt
237 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/ShapedWeights.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #include "ShapedWeights.hpp"
  6 | #include "onnx2trt_utils.hpp"
  7 | #include "trt_utils.hpp"
  8 | #include <cstdint>
  9 | #include <cstring>
 10 | 
 11 | namespace onnx2trt
 12 | {
 13 | 
 14 | size_t ShapedWeights::count() const
 15 | {
 16 |     if (this->values == nullptr && this->shape.nbDims <= 0)
 17 |     {
 18 |         return 0;
 19 |     }
 20 |     // TRT supports scalars, so 0D tensors should have a count of 1.
 21 |     size_t c = 1;
 22 |     for (int i = 0; i < this->shape.nbDims; ++i)
 23 |     {
 24 |         c *= this->shape.d[i];
 25 |     }
 26 |     return c;
 27 | }
 28 | 
 29 | ShapedWeights ShapedWeights::empty(DataType type)
 30 | {
 31 |     return ShapedWeights(type, nullptr, nvinfer1::Dims{0});
 32 | }
 33 | 
 34 | ShapedWeights::ShapedWeights()
 35 |     : values(nullptr)
 36 |     , shape{0}
 37 | {
 38 | }
 39 | 
 40 | ShapedWeights::ShapedWeights(DataType type_, void* values_, nvinfer1::Dims shape_)
 41 |     : type(type_)
 42 |     , values(values_)
 43 |     , shape(shape_)
 44 | {
 45 |     // Note: this->shape.type[] is not used
 46 | }
 47 | 
 48 | size_t ShapedWeights::size_bytes() const
 49 | {
 50 |     return this->count() * getDtypeSize(this->type);
 51 | }
 52 | 
 53 | ShapedWeights::operator bool() const
 54 | {
 55 |     return (bool) this->values;
 56 | }
 57 | 
 58 | ShapedWeights::operator nvinfer1::Weights() const
 59 | {
 60 |     nvinfer1::Weights w{};
 61 |     w.values = this->values;
 62 |     bool supported_type = convertDtype(this->type, &w.type);
 63 |     (void) supported_type;
 64 |     assert(supported_type);
 65 |     w.count = this->count();
 66 |     return w;
 67 | }
 68 | 
 69 | const char* ShapedWeights::getName() const
 70 | {
 71 |     return this->name;
 72 | }
 73 | 
 74 | void ShapedWeights::setName(const char* name)
 75 | {
 76 |     this->name = name;
 77 | }
 78 | 
 79 | template <typename DType>
 80 | void transpose4DWeights(ShapedWeights const& weights, nvinfer1::Permutation const perm, ShapedWeights* result)
 81 | {
 82 |     nvinfer1::Dims original_shape = weights.shape;
 83 |     nvinfer1::Dims new_shape = result->shape;
 84 |     int nbDims = new_shape.nbDims;
 85 |     DType const* src = reinterpret_cast<DType*>(weights.values);
 86 |     DType* dst = reinterpret_cast<DType*>(result->values);
 87 | 
 88 |     nvinfer1::Dims expanded_original_shape{4, {1, 1, 1, 1}};
 89 |     nvinfer1::Dims expanded_new_shape{4, {1, 1, 1, 1}};
 90 |     nvinfer1::Permutation expanded_perm{0, 1, 2, 3};
 91 |     
 92 |     int pad = 4 - nbDims;
 93 |     for (int i = 0; i < nbDims; ++i)
 94 |     {
 95 |         expanded_original_shape.d[pad + i] = original_shape.d[i];
 96 |         expanded_new_shape.d[pad + i] = new_shape.d[i];
 97 |         expanded_perm.order[pad + i] = perm.order[i] + pad;
 98 |     }
 99 | 
100 | 
101 |     int src_strides[4] = {1, 1, 1, 1};
102 |     int dst_strides[4] = {1, 1, 1, 1};
103 |     
104 |     for (int i = 2; i >= 0; --i)
105 |     {
106 |         src_strides[i] = expanded_original_shape.d[i + 1] * src_strides[i + 1];
107 |         dst_strides[i] = expanded_new_shape.d[i + 1] * dst_strides[i + 1];
108 |     } 
109 | 
110 |     for (int n = 0; n < expanded_original_shape.d[0]; ++n)
111 |     {
112 |         for (int c = 0; c < expanded_original_shape.d[1]; ++c)
113 |         {
114 |             for (int h = 0; h < expanded_original_shape.d[2]; ++h)
115 |             {
116 |                 for (int w = 0; w < expanded_original_shape.d[3]; ++w)
117 |                 {
118 |                     int src_index = 0;
119 |                     int dst_index = 0;
120 |                     int src_coord[4] = {n, c, h, w};
121 |                     int dst_coord[4];
122 |                     for (int i  = 0 ; i < 4; ++i)
123 |                     {
124 |                         dst_coord[i] = src_coord[expanded_perm.order[i]];
125 |                         src_index += src_coord[i] * src_strides[i];
126 |                         dst_index += dst_coord[i] * dst_strides[i];
127 |                     }
128 |                     dst[dst_index] = src[src_index];
129 |                 }
130 |             }
131 |         }
132 |     }
133 | }
134 | 
135 | bool transposeWeights(ShapedWeights const& weights, nvinfer1::Permutation const& perm, ShapedWeights* result, IImporterContext* ctx)
136 | {
137 |     nvinfer1::Dims shape = weights.shape;
138 |     int nbDims = shape.nbDims;
139 |     nvinfer1::Dims new_shape;
140 |     new_shape.nbDims = nbDims;
141 |     for (int d = 0; d < nbDims; ++d)
142 |     {
143 |         new_shape.d[d] = shape.d[perm.order[d]];
144 |         result->shape.d[d] = new_shape.d[d];
145 |     }
146 | 
147 |     if (shape.nbDims <= 4)
148 |     {
149 |         if (weights.type == ::onnx::TensorProto::FLOAT)
150 |         {
151 |             transpose4DWeights<float>(weights, perm, result);
152 |         }
153 |         else if (weights.type == ::onnx::TensorProto::FLOAT16)
154 |         {
155 |             transpose4DWeights<uint16_t>(weights, perm, result);
156 |         }
157 |         else
158 |         {
159 |             return false;
160 |         }
161 |     }
162 |     else
163 |     {
164 |         // TODO: Implement general transposes and multiple data types
165 |         // Unsupported weights transpose
166 |         return false;
167 |     }
168 |     nvinfer1::Dims permDims{nbDims, {}};
169 |     std::copy_n(perm.order, nbDims, permDims.d);
170 |     LOG_WARNING("Weights "
171 |         << weights.getName() << " has been transposed with permutation of " << permDims
172 |         << "! If you plan on overwriting the weights with the Refitter API, the new weights must be pre-transposed.");
173 |     result->setName(weights.getName());
174 |     return true;
175 | }
176 | 
177 | } // namespace onnx2trt
178 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/ShapedWeights.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <NvInfer.h>
 8 | #include <onnx/onnx_pb.h>
 9 | 
10 | namespace onnx2trt
11 | {
12 | 
13 | class ShapedWeights
14 | {
15 | public:
16 |     using DataType = int32_t;
17 | 
18 |     static ShapedWeights empty(DataType type);
19 | 
20 |     ShapedWeights();
21 | 
22 |     explicit ShapedWeights(DataType type, void* values, nvinfer1::Dims shape_);
23 | 
24 |     size_t count() const;
25 | 
26 |     size_t size_bytes() const;
27 | 
28 |     const char* getName() const;
29 | 
30 |     void setName(const char* name);
31 | 
32 |     explicit operator bool() const;
33 | 
34 |     operator nvinfer1::Weights() const;
35 | 
36 |     template <typename T>
37 |     T& at(size_t index)
38 |     {
39 |         assert(index >= 0 && (index * sizeof(T)) < size_bytes());
40 |         return static_cast<T*>(values)[index];
41 |     }
42 | 
43 |     template <typename T>
44 |     const T& at(size_t index) const
45 |     {
46 |         assert(index >= 0 && (index * sizeof(T)) < size_bytes());
47 |         return static_cast<const T*>(values)[index];
48 |     }
49 | 
50 | public:
51 |     DataType type;
52 |     void* values;
53 |     nvinfer1::Dims shape;
54 |     const char* name{};
55 | };
56 | 
57 | class IImporterContext;
58 | bool transposeWeights(ShapedWeights const& weights, nvinfer1::Permutation const& perm, ShapedWeights* result, IImporterContext* ctx);
59 | 
60 | } // namespace onnx2trt
61 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/Status.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include "NvOnnxParser.h"
  8 | 
  9 | #include <cassert>
 10 | #include <string>
 11 | 
 12 | // Used to strip out build path information from debug prints
 13 | #if defined(SOURCE_LENGTH)
 14 | #define __FILENAME__ (__FILE__ + SOURCE_LENGTH)
 15 | #else
 16 | #define __FILENAME__ (__FILE__)
 17 | #endif
 18 | 
 19 | #define MAKE_ERROR(desc, code) onnx2trt::Status((code), (desc), __FILENAME__, __LINE__, __func__)
 20 | 
 21 | #define ASSERT(condition, error_code)                                                                                  \
 22 |     do                                                                                                                 \
 23 |     {                                                                                                                  \
 24 |         if (!(condition))                                                                                              \
 25 |         {                                                                                                              \
 26 |             return MAKE_ERROR("Assertion failed: " #condition, (error_code));                                          \
 27 |         }                                                                                                              \
 28 |     } while (0)
 29 | 
 30 | #define MAKE_INPUT_ERROR(desc, code, name) Status((code), (desc), name, __LINE__, __func__)
 31 | 
 32 | #define ASSERT_INPUT(condition, error_code, name)                                                                      \
 33 |     do                                                                                                                 \
 34 |     {                                                                                                                  \
 35 |         if (!(condition))                                                                                              \
 36 |         {                                                                                                              \
 37 |             return MAKE_INPUT_ERROR("Assertion failed: " #condition, (error_code), (name));                            \
 38 |         }                                                                                                              \
 39 |     } while (0)
 40 | 
 41 | #define ASSERT_C(condition, error_code)                                                                                \
 42 |     do                                                                                                                 \
 43 |     {                                                                                                                  \
 44 |         if (!(condition))                                                                                              \
 45 |         {                                                                                                              \
 46 |             return error_code;                                                                                         \
 47 |         }                                                                                                              \
 48 |     } while (0)
 49 | 
 50 | #define GET_VALUE(value_or_error_, result_ptr)                                                                         \
 51 |     do                                                                                                                 \
 52 |     {                                                                                                                  \
 53 |         auto const& value_or_error = value_or_error_;                                                                  \
 54 |         if (value_or_error.is_error())                                                                                 \
 55 |         {                                                                                                              \
 56 |             return value_or_error.error();                                                                             \
 57 |         }                                                                                                              \
 58 |         else                                                                                                           \
 59 |         {                                                                                                              \
 60 |             *result_ptr = value_or_error.value();                                                                      \
 61 |         }                                                                                                              \
 62 |     } while (0)
 63 | 
 64 | #define CHECK(call)                                                                                                    \
 65 |     do                                                                                                                 \
 66 |     {                                                                                                                  \
 67 |         Status status = call;                                                                                          \
 68 |         if (!status.is_success())                                                                                      \
 69 |         {                                                                                                              \
 70 |             return status;                                                                                             \
 71 |         }                                                                                                              \
 72 |     } while (0)
 73 | 
 74 | namespace onnx2trt
 75 | {
 76 | 
 77 | using nvonnxparser::ErrorCode;
 78 | 
 79 | class Status : public nvonnxparser::IParserError
 80 | {
 81 |     ErrorCode _code;
 82 |     std::string _desc;
 83 |     std::string _file;
 84 |     int _line;
 85 |     std::string _func;
 86 |     int _node;
 87 | 
 88 | public:
 89 |     static Status success()
 90 |     {
 91 |         return Status(ErrorCode::kSUCCESS);
 92 |     }
 93 |     Status()
 94 |     {
 95 |     }
 96 |     explicit Status(ErrorCode code, std::string desc = "", std::string file = "", int line = 0, std::string func = "",
 97 |         int node = -1)
 98 |         : _code(code)
 99 |         , _desc(desc)
100 |         , _file(file)
101 |         , _line(line)
102 |         , _func(func)
103 |         , _node(node)
104 |     {
105 |     }
106 |     ErrorCode code() const override
107 |     {
108 |         return _code;
109 |     }
110 |     const char* desc() const override
111 |     {
112 |         return _desc.c_str();
113 |     }
114 |     const char* file() const override
115 |     {
116 |         return _file.c_str();
117 |     }
118 |     int line() const override
119 |     {
120 |         return _line;
121 |     }
122 |     const char* func() const override
123 |     {
124 |         return _func.c_str();
125 |     }
126 |     int node() const override
127 |     {
128 |         return _node;
129 |     }
130 |     bool is_error() const
131 |     {
132 |         return _code != ErrorCode::kSUCCESS;
133 |     }
134 |     bool is_success() const
135 |     {
136 |         return _code == ErrorCode::kSUCCESS;
137 |     }
138 |     void setNode(int node)
139 |     {
140 |         _node = node;
141 |     }
142 | };
143 | 
144 | template <typename T>
145 | class ValueOrStatus
146 | {
147 |     bool _is_error;
148 |     T _value;
149 |     Status _error;
150 | 
151 | public:
152 |     ValueOrStatus(T const& value)
153 |         : _is_error(false)
154 |         , _value(value)
155 |         , _error(Status::success())
156 |     {
157 |     }
158 |     ValueOrStatus(T&& value)
159 |         : _is_error(false)
160 |         , _value(value)
161 |         , _error(Status::success())
162 |     {
163 |     }
164 |     ValueOrStatus(Status const& error)
165 |         : _is_error(true)
166 |         , _error(error)
167 |     {
168 |     }
169 |     ValueOrStatus(Status&& error)
170 |         : _is_error(true)
171 |         , _error(error)
172 |     {
173 |     }
174 |     bool is_error() const
175 |     {
176 |         return _is_error;
177 |     }
178 |     T const& value() const
179 |     {
180 |         assert(!_is_error);
181 |         return _value;
182 |     }
183 |     T& value()
184 |     {
185 |         assert(!_is_error);
186 |         return _value;
187 |     }
188 |     Status const& error() const
189 |     {
190 |         assert(_is_error);
191 |         return _error;
192 |     }
193 | };
194 | 
195 | } // namespace onnx2trt
196 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/TensorOrWeights.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include "ShapedWeights.hpp"
  8 | 
  9 | #include <NvInfer.h>
 10 | #include <cassert>
 11 | 
 12 | namespace onnx2trt
 13 | {
 14 | 
 15 | class TensorOrWeights
 16 | {
 17 |     union
 18 |     {
 19 |         nvinfer1::ITensor* _tensor;
 20 |         ShapedWeights _weights;
 21 |     };
 22 |     enum
 23 |     {
 24 |         NODE_TENSOR,
 25 |         NODE_WEIGHTS
 26 |     } _variant;
 27 | 
 28 | public:
 29 |     TensorOrWeights()
 30 |         : _tensor(nullptr)
 31 |         , _variant(NODE_TENSOR)
 32 |     {
 33 |     }
 34 |     TensorOrWeights(nvinfer1::ITensor* tensor)
 35 |         : _tensor(tensor)
 36 |         , _variant(NODE_TENSOR)
 37 |     {
 38 |     }
 39 |     TensorOrWeights(ShapedWeights const& weights)
 40 |         : _weights(weights)
 41 |         , _variant(NODE_WEIGHTS)
 42 |     {
 43 |     }
 44 |     bool is_tensor() const
 45 |     {
 46 |         return _variant == NODE_TENSOR;
 47 |     }
 48 |     bool is_weights() const
 49 |     {
 50 |         return _variant == NODE_WEIGHTS;
 51 |     }
 52 |     bool isNullTensor() const
 53 |     {
 54 |         return is_tensor() && _tensor == nullptr;
 55 |     }
 56 |     nvinfer1::ITensor& tensor()
 57 |     {
 58 |         assert(!isNullTensor());
 59 |         return *_tensor;
 60 |     }
 61 |     nvinfer1::ITensor const& tensor() const
 62 |     {
 63 |         assert(!isNullTensor());
 64 |         return *_tensor;
 65 |     }
 66 |     ShapedWeights& weights()
 67 |     {
 68 |         assert(is_weights());
 69 |         return _weights;
 70 |     }
 71 |     ShapedWeights const& weights() const
 72 |     {
 73 |         assert(is_weights());
 74 |         return _weights;
 75 |     }
 76 |     nvinfer1::Dims shape() const
 77 |     {
 78 |         return is_tensor() ? _tensor->getDimensions() : _weights.shape;
 79 |     }
 80 |     explicit operator bool() const
 81 |     {
 82 |         return is_tensor() ? _tensor != nullptr : static_cast<bool>(_weights);
 83 |     }
 84 |     bool isInt32() const
 85 |     {
 86 |         return is_tensor() ? _tensor->getType() == nvinfer1::DataType::kINT32 : _weights.type == ::onnx::TensorProto_DataType_INT32;
 87 |     }
 88 |     bool isBool() const
 89 |     {
 90 |         return is_tensor() ? _tensor->getType() == nvinfer1::DataType::kBOOL : _weights.type == ::onnx::TensorProto_DataType_BOOL;
 91 |     }
 92 |     std::string getName() const
 93 |     {
 94 |         return is_tensor() ? _tensor->getName() : _weights.getName();
 95 |     }
 96 |     std::string getType() const
 97 |     {
 98 |         if (is_tensor())
 99 |         {
100 |             switch(_tensor->getType())
101 |             {
102 |                 case nvinfer1::DataType::kFLOAT:return "FLOAT";
103 |                 case nvinfer1::DataType::kHALF: return "HALF";
104 |                 case nvinfer1::DataType::kINT8: return "INT8";
105 |                 case nvinfer1::DataType::kINT32: return "INT32";
106 |                 case nvinfer1::DataType::kBOOL: return "BOOL";
107 |                 default: return "UNKNOWN TYPE";
108 |             }
109 |         }
110 |         else
111 |         {
112 |             switch(_weights.type)
113 |             {
114 |                 case ::onnx::TensorProto::DOUBLE: return "DOUBLE -> FLOAT";
115 |                 case ::onnx::TensorProto::FLOAT: return "FLOAT";
116 |                 case ::onnx::TensorProto::INT8: return "INT8";
117 |                 case ::onnx::TensorProto::FLOAT16: return "HALF";
118 |                 case ::onnx::TensorProto::BOOL: return "BOOL";
119 |                 case ::onnx::TensorProto::INT32: return "INT32";
120 |                 case ::onnx::TensorProto::INT64: return "INT64 -> INT32";
121 |                 default: return "UNKNOWN TYPE";
122 |             }
123 |         }
124 |     }
125 | };
126 | 
127 | } // namespace onnx2trt
128 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/builtin_op_importers.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "onnx2trt.hpp"
 8 | #include "utils.hpp"
 9 | 
10 | namespace onnx2trt
11 | {
12 | 
13 | string_map<NodeImporter>& getBuiltinOpImporterMap();
14 | 
15 | } // namespace onnx2trt
16 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/onnx2trt.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "NvOnnxParser.h"
 8 | #include "ShapedWeights.hpp"
 9 | #include "Status.hpp"
10 | #include "TensorOrWeights.hpp"
11 | 
12 | #include <NvInfer.h>
13 | #include <functional>
14 | #include <onnx/onnx_pb.h>
15 | #include <unordered_map>
16 | #include <unordered_set>
17 | #include <fstream>
18 | #include <vector>
19 | 
20 | namespace onnx2trt
21 | {
22 | 
23 | class IImporterContext;
24 | 
25 | // TODO: Find ABI-safe alternative approach for this:
26 | //         Can't use std::vector
27 | //         Can't use ::onnx::NodeProto
28 | //         Can't use std::function
29 | typedef ValueOrStatus<std::vector<TensorOrWeights>> NodeImportResult;
30 | typedef std::function<NodeImportResult(
31 |     IImporterContext* ctx, ::onnx::NodeProto const& node, std::vector<TensorOrWeights>& inputs)>
32 |     NodeImporter;
33 | 
34 | template <typename T>
35 | using StringMap = std::unordered_map<std::string, T>;
36 | 
37 | class IImporterContext
38 | {
39 | public:
40 |     virtual nvinfer1::INetworkDefinition* network() = 0;
41 |     virtual StringMap<TensorOrWeights>& tensors() = 0;
42 |     virtual StringMap<nvinfer1::TensorLocation>& tensorLocations() = 0;
43 |     virtual StringMap<float>& tensorRangeMins() = 0;
44 |     virtual StringMap<float>& tensorRangeMaxes() = 0;
45 |     virtual StringMap<nvinfer1::DataType>& layerPrecisions() = 0;
46 |     virtual std::unordered_set<std::string>& unsupportedShapeTensors() = 0;
47 |     virtual StringMap<std::string>& loopTensors() = 0;
48 |     virtual void setOnnxFileLocation(std::string location) = 0;
49 |     virtual std::string getOnnxFileLocation() = 0;
50 |     virtual void registerTensor(TensorOrWeights tensor, const std::string& basename) = 0;
51 |     virtual void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) = 0;
52 |     virtual ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) = 0;
53 |     virtual int64_t getOpsetVersion(const char* domain = "") const = 0;
54 |     virtual nvinfer1::ILogger& logger() = 0;
55 |     virtual bool hasError() const = 0;
56 |     virtual nvinfer1::IErrorRecorder* getErrorRecorder() const = 0;
57 | 
58 | protected:
59 |     virtual ~IImporterContext()
60 |     {
61 |     }
62 | };
63 | 
64 | } // namespace onnx2trt
65 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/onnx2trt_common.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <NvInfer.h>
 8 | #include <memory>
 9 | 
10 | #if NV_TENSORRT_MAJOR < 4
11 | namespace nvinfer1
12 | {
13 | 
14 | enum class PluginFormat : uint8_t
15 | {
16 |     kNCHW = 0,   //!< NCHW
17 |     kNC2HW2 = 1, //!< NCHW with 2-element packed channels
18 |     kNHWC8 = 2   //!< NHWC with 8-element packed channels (C
19 |                  //! must be a multiple of 8)
20 | };
21 | // from NvInfer.h
22 | class IPluginExt : public IPlugin
23 | {
24 | public:
25 |     virtual int getTensorRTVersion() const noexcept
26 |     {
27 |         return NV_TENSORRT_VERSION;
28 |     }
29 |     virtual bool supportsFormat(DataType type, PluginFormat format) const noexcept = 0;
30 |     virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs,
31 |         DataType type, PluginFormat format, int maxBatchSize) noexcept
32 |         = 0;
33 | 
34 | protected:
35 |     void configure(
36 |         const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept final
37 |     {
38 |         try
39 |         {
40 |             DataType type = nvinfer1::DataType::kFLOAT;
41 |             PluginFormat format = nvinfer1::PluginFormat::kLINEAR;
42 |             return this->configureWithFormat(inputDims, nbInputs, outputDims, nbOutputs, type, format, maxBatchSize);
43 |         }
44 |         catch (const std::exception& e)
45 |         {
46 |             nvinfer1::getLogger()->log(nvinfer1::ILogger::Severity::kERROR, e.what().c_str());
47 |         }
48 |     }
49 |     virtual ~IPluginExt()
50 |     {
51 |     }
52 | };
53 | 
54 | } // namespace nvinfer1
55 | #endif
56 | 
57 | namespace onnx2trt
58 | {
59 | 
60 | struct IOwnable
61 | {
62 |     virtual void destroy() = 0;
63 | 
64 | protected:
65 |     virtual ~IOwnable()
66 |     {
67 |     }
68 | };
69 | 
70 | struct OwnableDeleter
71 | {
72 |     void operator()(IOwnable* obj) const
73 |     {
74 |         obj->destroy();
75 |     }
76 | };
77 | 
78 | using UniqueOwnable = std::unique_ptr<IOwnable, OwnableDeleter>;
79 | class Plugin;
80 | class PluginV2;
81 | 
82 | } // namespace onnx2trt
83 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/onnx2trt_runtime.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "onnx2trt_common.hpp"
 8 | 
 9 | namespace onnx2trt
10 | {
11 | 
12 | typedef Plugin* (*plugin_deserializer)(const void* serialData, size_t serialLength);
13 | 
14 | } // namespace onnx2trt
15 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/onnxErrorRecorder.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #include "onnxErrorRecorder.hpp"
  6 | #include <exception>
  7 | 
  8 | namespace onnx2trt
  9 | {
 10 | 
 11 | 
 12 | ONNXParserErrorRecorder* ONNXParserErrorRecorder::create(
 13 |     nvinfer1::ILogger* logger, nvinfer1::IErrorRecorder* otherRecorder)
 14 | {
 15 |     try
 16 |     {
 17 |         auto recorder = new ONNXParserErrorRecorder(logger, otherRecorder);
 18 |         if (recorder)
 19 |         {
 20 |             recorder->incRefCount();
 21 |         }
 22 |         return recorder;
 23 |     }
 24 |     catch (const std::exception& e)
 25 |     {
 26 |         logError(logger, e.what());
 27 |         return nullptr;
 28 |     }
 29 | }
 30 | 
 31 | void ONNXParserErrorRecorder::destroy(ONNXParserErrorRecorder*& recorder)
 32 | {
 33 |     if (recorder)
 34 |     {
 35 |         recorder->decRefCount();
 36 |         recorder = nullptr;
 37 |     }
 38 | }
 39 | 
 40 | void ONNXParserErrorRecorder::logError(nvinfer1::ILogger* logger, const char* str)
 41 | {
 42 |     if (logger)
 43 |     {
 44 |         logger->log(ILogger::Severity::kERROR, str);
 45 |     }
 46 | }
 47 | 
 48 | ONNXParserErrorRecorder::ONNXParserErrorRecorder(
 49 |     nvinfer1::ILogger* logger, nvinfer1::IErrorRecorder* otherRecorder)
 50 |     : mUserRecorder(otherRecorder)
 51 |     , mLogger(logger)
 52 | {
 53 |     if (mUserRecorder)
 54 |     {
 55 |         mUserRecorder->incRefCount();
 56 |     }
 57 | }
 58 | 
 59 | ONNXParserErrorRecorder::~ONNXParserErrorRecorder() noexcept
 60 | {
 61 |     if (mUserRecorder)
 62 |     {
 63 |         mUserRecorder->decRefCount();
 64 |     }
 65 | }
 66 | 
 67 | void ONNXParserErrorRecorder::clear() noexcept
 68 | {
 69 |     try
 70 |     {
 71 |         // grab a lock so that there is no addition while clearing.
 72 |         std::lock_guard<std::mutex> guard(mStackLock);
 73 |         mErrorStack.clear();
 74 |     }
 75 |     catch (const std::exception& e)
 76 |     {
 77 |         logError(mLogger, e.what());
 78 |     }
 79 | };
 80 | 
 81 | bool ONNXParserErrorRecorder::reportError(
 82 |     nvinfer1::ErrorCode val, nvinfer1::IErrorRecorder::ErrorDesc desc) noexcept
 83 | {
 84 |     try
 85 |     {
 86 |         std::lock_guard<std::mutex> guard(mStackLock);
 87 |         mErrorStack.push_back(errorPair(val, desc));
 88 |         if (mUserRecorder)
 89 |         {
 90 |             mUserRecorder->reportError(val, desc);
 91 |         }
 92 |         else
 93 |         {
 94 |             logError(mLogger, desc);
 95 |         }
 96 |     }
 97 |     catch (const std::exception& e)
 98 |     {
 99 |         logError(mLogger, e.what());
100 |     }
101 |     // All errors are considered fatal.
102 |     return true;
103 | }
104 | 
105 | nvinfer1::IErrorRecorder::RefCount ONNXParserErrorRecorder::incRefCount() noexcept
106 | {
107 |     // Atomically increment or decrement the ref counter.
108 |     return ++mRefCount;
109 | }
110 | 
111 | nvinfer1::IErrorRecorder::RefCount ONNXParserErrorRecorder::decRefCount() noexcept
112 | {
113 |     auto newVal = --mRefCount;
114 |     if (newVal == 0)
115 |     {
116 |         delete this;
117 |     }
118 |     return newVal;
119 | }
120 | 
121 | } // namespace onnx2trt
122 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/onnxErrorRecorder.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include "NvInferRuntimeCommon.h"
  8 | #include "onnx2trt_utils.hpp"
  9 | #include <atomic>
 10 | #include <cstdint>
 11 | #include <exception>
 12 | #include <mutex>
 13 | #include <vector>
 14 | 
 15 | namespace onnx2trt
 16 | {
 17 | 
 18 | //!
 19 | //! A simple implementation of the IErrorRecorder interface for
 20 | //! use by ONNX importer.
 21 | //! ONNX-importer Error recorder is based on a vector that pairs the error
 22 | //! code and the error string into a single element. It also uses
 23 | //! standard mutex and atomics in order to make sure that the code
 24 | //! works in a multi-threaded environment.
 25 | //!
 26 | class ONNXParserErrorRecorder : public nvinfer1::IErrorRecorder
 27 | {
 28 |     using RefCount       = nvinfer1::IErrorRecorder::RefCount;
 29 |     using ErrorDesc      = nvinfer1::IErrorRecorder::ErrorDesc;
 30 |     using ErrorCode      = nvinfer1::ErrorCode;
 31 |     using IErrorRecorder = nvinfer1::IErrorRecorder;
 32 |     using ILogger        = nvinfer1::ILogger;
 33 | 
 34 |     using errorPair      = std::pair<ErrorCode, std::string>;
 35 |     using errorStack     = std::vector<errorPair>;
 36 | 
 37 | public:
 38 |     static ONNXParserErrorRecorder* create(
 39 |         ILogger* logger, IErrorRecorder* otherRecorder = nullptr);
 40 | 
 41 |     static void destroy(ONNXParserErrorRecorder*& recorder);
 42 | 
 43 |     void     clear()       noexcept final;
 44 |     RefCount incRefCount() noexcept final;
 45 |     RefCount decRefCount() noexcept final;
 46 |     bool     reportError(ErrorCode val, ErrorDesc desc) noexcept final;
 47 | 
 48 |     int32_t getNbErrors() const noexcept final
 49 |     {
 50 |         return mErrorStack.size();
 51 |     }
 52 | 
 53 |     ErrorCode getErrorCode(int32_t errorIdx) const noexcept final
 54 |     {
 55 |         return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first;
 56 |     }
 57 | 
 58 |     ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final
 59 |     {
 60 |         return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str();
 61 |     }
 62 | 
 63 |     bool hasOverflowed() const noexcept final
 64 |     {
 65 |         // This class can never overflow since we have dynamic resize via std::vector usage.
 66 |         return false;
 67 |     }
 68 | 
 69 | protected:
 70 |     ONNXParserErrorRecorder(ILogger* logger, IErrorRecorder* otherRecorder = nullptr);
 71 | 
 72 |     virtual ~ONNXParserErrorRecorder() noexcept;
 73 | 
 74 |     static void logError(ILogger* logger, const char* str);
 75 | 
 76 |     // Simple helper functions.
 77 |     const errorPair& operator[](size_t index) const noexcept
 78 |     {
 79 |         return mErrorStack[index];
 80 |     }
 81 | 
 82 |     bool invalidIndexCheck(int32_t index) const noexcept
 83 |     {
 84 |         // By converting signed to unsigned, we only need a single check since
 85 |         // negative numbers turn into large positive greater than the size.
 86 |         size_t sIndex = index;
 87 |         return sIndex >= mErrorStack.size();
 88 |     }
 89 |     // Mutex to hold when locking mErrorStack.
 90 |     std::mutex mStackLock;
 91 | 
 92 |     // Reference count of the class. Destruction of the class when mRefCount
 93 |     // is not zero causes undefined behavior.
 94 |     std::atomic<int32_t> mRefCount{0};
 95 | 
 96 |     // The error stack that holds the errors recorded by TensorRT.
 97 |     errorStack mErrorStack;
 98 | 
 99 |     // Original error recorder (set by user)
100 |     IErrorRecorder* mUserRecorder{nullptr};
101 | 
102 |     // logger
103 |     ILogger* mLogger{nullptr};
104 | }; // class ONNXParserErrorRecorder
105 | 
106 | } // namespace onnx2trt
107 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/onnx_utils.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #include <fstream>
  6 | #include <google/protobuf/io/coded_stream.h>
  7 | #include <google/protobuf/io/zero_copy_stream_impl.h>
  8 | #include <google/protobuf/text_format.h>
  9 | #include <iostream>
 10 | #include <onnx/onnx_pb.h>
 11 | #include <sstream>
 12 | 
 13 | #pragma once
 14 | 
 15 | namespace
 16 | {
 17 | 
 18 | template <typename OnnxDims>
 19 | bool convertOnnxDims(OnnxDims const& onnxDims, nvinfer1::Dims& trtDims)
 20 | {
 21 |     std::vector<int> onnxDims_vector;
 22 |     for (const auto& onnxDim : onnxDims)
 23 |     {
 24 |         const int dim = onnxDim.dim_param() == "" ? (onnxDim.dim_value() >= 0 ? onnxDim.dim_value() : -1) : -1;
 25 |         onnxDims_vector.emplace_back(dim);
 26 |     }
 27 |     trtDims.nbDims = onnxDims_vector.size();
 28 |     assert(trtDims.nbDims <= nvinfer1::Dims::MAX_DIMS);
 29 |     std::copy(onnxDims_vector.begin(), onnxDims_vector.end(), trtDims.d);
 30 |     return true;
 31 | }
 32 | 
 33 | // Removes raw data from the text representation of an ONNX model
 34 | void remove_raw_data_strings(std::string& s)
 35 | {
 36 |     std::string::size_type beg = 0;
 37 |     const std::string key = "raw_data: \"";
 38 |     const std::string sub = "...";
 39 |     while ((beg = s.find(key, beg)) != std::string::npos)
 40 |     {
 41 |         beg += key.length();
 42 |         std::string::size_type end = beg - 1;
 43 |         // Note: Must skip over escaped end-quotes
 44 |         while (s[(end = s.find("\"", ++end)) - 1] == '\\')
 45 |         {
 46 |         }
 47 |         if (end - beg > 128)
 48 |         { // Only remove large data strings
 49 |             s.replace(beg, end - beg, "...");
 50 |         }
 51 |         beg += sub.length();
 52 |     }
 53 | }
 54 | 
 55 | // Removes float_data, int32_data etc. from the text representation of an ONNX model
 56 | std::string remove_repeated_data_strings(std::string& s)
 57 | {
 58 |     std::istringstream iss(s);
 59 |     std::ostringstream oss;
 60 |     bool is_repeat = false;
 61 |     for (std::string line; std::getline(iss, line);)
 62 |     {
 63 |         if (line.find("float_data:") != std::string::npos || line.find("int32_data:") != std::string::npos
 64 |             || line.find("int64_data:") != std::string::npos)
 65 |         {
 66 |             if (!is_repeat)
 67 |             {
 68 |                 is_repeat = true;
 69 |                 oss << line.substr(0, line.find(":") + 1) << " ...\n";
 70 |             }
 71 |         }
 72 |         else
 73 |         {
 74 |             is_repeat = false;
 75 |             oss << line << "\n";
 76 |         }
 77 |     }
 78 |     return oss.str();
 79 | }
 80 | 
 81 | } // anonymous namespace
 82 | 
 83 | inline std::string pretty_print_onnx_to_string(::google::protobuf::Message const& message)
 84 | {
 85 |     std::string s;
 86 |     ::google::protobuf::TextFormat::PrintToString(message, &s);
 87 |     remove_raw_data_strings(s);
 88 |     s = remove_repeated_data_strings(s);
 89 |     return s;
 90 | }
 91 | 
 92 | inline std::ostream& operator<<(std::ostream& stream, ::onnx::ModelProto const& message)
 93 | {
 94 |     stream << pretty_print_onnx_to_string(message);
 95 |     return stream;
 96 | }
 97 | 
 98 | inline std::ostream& operator<<(std::ostream& stream, ::onnx::NodeProto const& message)
 99 | {
100 |     stream << pretty_print_onnx_to_string(message);
101 |     return stream;
102 | }
103 | 
104 | //...
105 | //...Consider moving all of the below functions into a stand alone
106 | //...
107 | 
108 | inline bool ParseFromFile_WAR(google::protobuf::Message* msg, const char* filename)
109 | {
110 | 
111 |     std::ifstream stream(filename, std::ios::in | std::ios::binary);
112 |     if (!stream)
113 |     {
114 |         std::cerr << "Could not open file " << std::string(filename) << std::endl;
115 |         return false;
116 |     }
117 |     google::protobuf::io::IstreamInputStream rawInput(&stream);
118 | 
119 |     google::protobuf::io::CodedInputStream coded_input(&rawInput);
120 |     // Note: This WARs the very low default size limit (64MB)
121 |     coded_input.SetTotalBytesLimit(std::numeric_limits<int>::max());
122 |     return msg->ParseFromCodedStream(&coded_input);
123 | }
124 | 
125 | inline bool ParseFromTextFile(google::protobuf::Message* msg, const char* filename)
126 | {
127 |     std::ifstream stream(filename, std::ios::in);
128 |     if (!stream)
129 |     {
130 |         std::cerr << "Could not open file " << std::string(filename) << std::endl;
131 |         return false;
132 |     }
133 | 
134 |     google::protobuf::io::IstreamInputStream rawInput(&stream);
135 | 
136 |     return google::protobuf::TextFormat::Parse(&rawInput, msg);
137 | }
138 | 
139 | inline std::string onnx_ir_version_string(int64_t ir_version = ::onnx::IR_VERSION)
140 | {
141 |     int onnx_ir_major = ir_version / 1000000;
142 |     int onnx_ir_minor = ir_version % 1000000 / 10000;
143 |     int onnx_ir_patch = ir_version % 10000;
144 |     return (std::to_string(onnx_ir_major) + "." + std::to_string(onnx_ir_minor) + "." + std::to_string(onnx_ir_patch));
145 | }
146 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/readme.md:
--------------------------------------------------------------------------------
1 | # ONNX Parser
2 | - 这几个文件提取自官方的onnx-tensorrt，去掉python方面，其他都在
3 | - 另外增加了Plugin节点的支持
4 | - https://github.com/onnx/onnx-tensorrt


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/toposort.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <unordered_map>
 8 | #include <vector>
 9 | 
10 | #include <iostream>
11 | using std::cout;
12 | using std::cerr;
13 | using std::endl;
14 | 
15 | namespace
16 | {
17 | 
18 | enum NodeState
19 | {
20 |     NODE_UNVISITED,
21 |     NODE_ACTIVE,
22 |     NODE_VISITED
23 | };
24 | 
25 | template <class Container>
26 | bool get_post_order(size_t node_idx, Container const& nodes, std::unordered_map<std::string, size_t> const& node_map,
27 |     std::vector<NodeState>* node_states, std::vector<size_t>* order)
28 | {
29 |     NodeState& node_state = node_states->at(node_idx);
30 |     if (node_state == NODE_ACTIVE)
31 |     {
32 |         // Cycle detected!
33 |         cerr << "ERROR: Graph contains a cycle" << endl;
34 |         return false;
35 |     }
36 |     else if (node_state == NODE_VISITED)
37 |     {
38 |         return true;
39 |     }
40 |     else
41 |     {
42 |         node_state = NODE_ACTIVE;
43 |         // TODO: This .Get().input() is highly specific to protobuf, should
44 |         //       generalise it somehow.
45 |         for (auto const& input : nodes.Get(node_idx).input())
46 |         {
47 |             if (!node_map.count(input))
48 |             {
49 |                 // Input node not found in graph!
50 |                 // cerr << "ERROR: Input node not found in graph: "
51 |                 //     << input << endl;
52 |                 // return false;
53 |                 continue; // Skip missing input edges
54 |             }
55 |             size_t input_node_idx = node_map.at(input);
56 |             if (!get_post_order(input_node_idx, nodes, node_map, node_states, order))
57 |             {
58 |                 return false;
59 |             }
60 |         }
61 |         node_state = NODE_VISITED;
62 |         order->push_back(node_idx);
63 |     }
64 |     return true;
65 | }
66 | 
67 | } // anonymous namespace
68 | 
69 | template <class Container>
70 | bool toposort(Container const& nodes, std::vector<size_t>* order)
71 | {
72 |     std::unordered_map<std::string, size_t> node_map;
73 |     for (size_t i = 0; i < (size_t) nodes.size(); ++i)
74 |     {
75 |         // TODO: This .Get().input() is highly specific to protobuf, should
76 |         //       generalise it somehow.
77 |         for (auto const& output : nodes.Get(i).output())
78 |         {
79 |             if (!node_map.emplace(output, i).second)
80 |             {
81 |                 // Output name appears more than once in graph!
82 |                 cerr << "ERROR: Output name is not unique: " << output << endl;
83 |                 return false;
84 |             }
85 |         }
86 |     }
87 |     order->reserve(nodes.size());
88 |     std::vector<NodeState> node_states(nodes.size(), NODE_UNVISITED);
89 |     for (size_t i = 0; i < (size_t) nodes.size(); ++i)
90 |     {
91 |         if (!get_post_order(i, nodes, node_map, &node_states, order))
92 |         {
93 |             return false;
94 |         }
95 |     }
96 |     return true;
97 | }
98 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/trt_utils.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-License-Identifier: Apache-2.0
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include "Status.hpp"
  8 | #include "TensorOrWeights.hpp"
  9 | #include "onnx2trt.hpp"
 10 | 
 11 | #include <NvInfer.h>
 12 | #include <algorithm>
 13 | #include <cassert>
 14 | #include <cmath>
 15 | 
 16 | namespace onnx2trt
 17 | {
 18 | 
 19 | inline int getDtypeSize(nvinfer1::DataType trtDtype)
 20 | {
 21 |     switch (trtDtype)
 22 |     {
 23 |     case nvinfer1::DataType::kFLOAT: return 4;
 24 |     case nvinfer1::DataType::kINT8: return 1;
 25 |     case nvinfer1::DataType::kHALF: return 2;
 26 |     case nvinfer1::DataType::kINT32:
 27 |         return 4;
 28 |     // TRT does not support booleans as a native type, so we treat them like int32 values.
 29 |     case nvinfer1::DataType::kBOOL:
 30 |         return 4;
 31 |     // TODO: Some sort of error handling
 32 |     default: return -1;
 33 |     }
 34 | }
 35 | 
 36 | inline nvinfer1::Dims insert_dim(nvinfer1::Dims const& dims, int idx, int value)
 37 | {
 38 |     assert(idx < dims.nbDims + 1);
 39 |     nvinfer1::Dims new_dims;
 40 |     new_dims.nbDims = dims.nbDims + 1;
 41 |     for (int i = 0; i < idx; ++i)
 42 |     {
 43 |         new_dims.d[i] = dims.d[i];
 44 |     }
 45 |     new_dims.d[idx] = value;
 46 |     for (int i = idx + 1; i < new_dims.nbDims; ++i)
 47 |     {
 48 |         new_dims.d[i] = dims.d[i - 1];
 49 |     }
 50 |     return new_dims;
 51 | }
 52 | 
 53 | inline nvinfer1::Dims remove_dim(nvinfer1::Dims const& dims, int idx)
 54 | {
 55 |     assert(idx < dims.nbDims);
 56 |     nvinfer1::Dims new_dims;
 57 |     new_dims.nbDims = dims.nbDims - 1;
 58 |     for (int i = 0; i < idx; ++i)
 59 |     {
 60 |         new_dims.d[i] = dims.d[i];
 61 |     }
 62 |     for (int i = idx; i < new_dims.nbDims; ++i)
 63 |     {
 64 |         new_dims.d[i] = dims.d[i + 1];
 65 |     }
 66 |     // Special case for scalar result (i.e., there was only one dim originally)
 67 |     if (new_dims.nbDims == 0)
 68 |     {
 69 |         new_dims.nbDims = 1;
 70 |         new_dims.d[0] = 1;
 71 |     }
 72 |     return new_dims;
 73 | }
 74 | 
 75 | // Adds unitary dimensions on the left
 76 | inline nvinfer1::Dims expand_dims(nvinfer1::Dims const& dims, int ndim_new)
 77 | {
 78 |     assert(dims.nbDims <= ndim_new);
 79 |     nvinfer1::Dims new_dims;
 80 |     new_dims.nbDims = ndim_new;
 81 |     int j = 0;
 82 |     for (; j < ndim_new - dims.nbDims; ++j)
 83 |     {
 84 |         new_dims.d[j] = 1;
 85 |     }
 86 |     for (int i = 0; i < dims.nbDims; ++i, ++j)
 87 |     {
 88 |         new_dims.d[j] = dims.d[i];
 89 |     }
 90 |     return new_dims;
 91 | }
 92 | 
 93 | inline nvinfer1::Permutation remove_first_dim(nvinfer1::Permutation const& perm)
 94 | {
 95 |     assert(perm.order[0] == 0);
 96 |     nvinfer1::Permutation new_perm;
 97 |     int ndim = nvinfer1::Dims::MAX_DIMS;
 98 |     for (int i = 0; i < ndim - 1; ++i)
 99 |     {
100 |         new_perm.order[i] = perm.order[i + 1] - 1;
101 |     }
102 |     return new_perm;
103 | }
104 | 
105 | inline nvinfer1::Dims squeeze_trailing_dims(nvinfer1::Dims const& dims)
106 | {
107 |     nvinfer1::Dims new_dims = dims;
108 |     // Note: TRT requires at least one dimension, so we don't squeeze [1]->[]
109 |     while (new_dims.nbDims > 1 && new_dims.d[new_dims.nbDims - 1] == 1)
110 |     {
111 |         --new_dims.nbDims;
112 |     }
113 |     return new_dims;
114 | }
115 | 
116 | inline nvinfer1::Dims squeeze_leading_dims(const nvinfer1::Dims& dims)
117 | {
118 |     nvinfer1::Dims newDims;
119 |     // Copy dims only if a non-1 has been seen already.
120 |     bool non1Seen{false};
121 |     newDims.nbDims = std::copy_if(dims.d, dims.d + dims.nbDims, newDims.d,
122 |                          [&non1Seen](int x) {
123 |                              non1Seen = (x != 1) ? true : non1Seen;
124 |                              return non1Seen;
125 |                          })
126 |         - newDims.d;
127 |     return newDims;
128 | }
129 | 
130 | inline nvinfer1::DimsHW operator-(nvinfer1::DimsHW dims)
131 | {
132 |     return nvinfer1::DimsHW(-dims.h(), -dims.w());
133 | }
134 | 
135 | // Note: These are used for checking beg_padding == end_padding
136 | inline bool operator==(nvinfer1::Dims const& a, nvinfer1::Dims const& b)
137 | {
138 |     if (a.nbDims != b.nbDims)
139 |     {
140 |         return false;
141 |     }
142 |     for (int i = 0; i < a.nbDims; ++i)
143 |     {
144 |         if (a.d[i] != b.d[i])
145 |         {
146 |             return false;
147 |         }
148 |     }
149 |     return true;
150 | }
151 | inline bool operator!=(nvinfer1::Dims const& a, nvinfer1::Dims const& b)
152 | {
153 |     return !(a == b);
154 | }
155 | 
156 | inline TensorOrWeights identity(IImporterContext* ctx, TensorOrWeights input)
157 | {
158 |     if (input.is_weights())
159 |     {
160 |         return input;
161 |     }
162 |     else
163 |     {
164 |         auto* layer = ctx->network()->addIdentity(input.tensor());
165 |         if (!layer)
166 |         {
167 |             return nullptr;
168 |         }
169 |         return layer->getOutput(0);
170 |     }
171 | }
172 | 
173 | inline ::onnx::TensorProto_DataType trtDataTypeToONNX(nvinfer1::DataType dt)
174 | {
175 |     switch (dt)
176 |     {
177 |     case nvinfer1::DataType::kFLOAT: return ::onnx::TensorProto::FLOAT;
178 |     case nvinfer1::DataType::kHALF: return ::onnx::TensorProto::FLOAT16;
179 |     case nvinfer1::DataType::kINT32: return ::onnx::TensorProto::INT32;
180 |     case nvinfer1::DataType::kINT8: return ::onnx::TensorProto::INT8;
181 |     case nvinfer1::DataType::kBOOL: return ::onnx::TensorProto::BOOL;
182 |     default: return ::onnx::TensorProto_DataType_UNDEFINED;
183 |     }
184 |     throw std::runtime_error{"Unreachable"};
185 | }
186 | 
187 | } // namespace onnx2trt
188 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnx_parser/utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: Apache-2.0
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <unordered_map>
 8 | 
 9 | template <typename T>
10 | using string_map = std::unordered_map<std::string, T>;
11 | 


--------------------------------------------------------------------------------
/src/tensorRT/onnxplugin/onnxplugin.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "onnxplugin.hpp"
  3 | #include <string>
  4 | 
  5 | using namespace nvinfer1;
  6 | using namespace std;
  7 | 
  8 | namespace ONNXPlugin {
  9 | 
 10 | 	GTensor::GTensor(float* ptr, int ndims, int* dims) {
 11 | 		this->ptr_ = ptr;
 12 | 		this->shape_.insert(shape_.end(), dims, dims + ndims);
 13 | 		this->dtype_ = TRT::DataType::Float;
 14 | 	}
 15 | 
 16 | 	int GTensor::offset_array(size_t size, const int* index_array) const{
 17 | 
 18 | 		Assert(size <= shape_.size());
 19 | 		int value = 0;
 20 | 		for(int i = 0; i < shape_.size(); ++i){
 21 | 
 22 | 			if(i < size)
 23 | 				value += index_array[i];
 24 | 
 25 | 			if(i + 1 < shape_.size())
 26 | 				value *= shape_[i+1];
 27 | 		}
 28 | 		return value;
 29 | 	}
 30 | 
 31 | 	int GTensor::offset_array(const std::vector<int>& index) const{
 32 | 		return offset_array(index.size(), index.data());
 33 | 	}
 34 | 
 35 | 	GTensor::GTensor(TRT::float16* ptr, int ndims, int* dims) {
 36 | 		this->ptr_ = ptr;
 37 | 		this->shape_.insert(shape_.end(), dims, dims + ndims);
 38 | 		this->dtype_ = TRT::DataType::Float16;
 39 | 	}
 40 | 
 41 | 	GTensor::GTensor(const TRT::Tensor& tensor) {
 42 | 		this->ptr_ = (float*)tensor.gpu();
 43 | 		this->shape_ = tensor.dims();
 44 | 		this->dtype_ = TRT::DataType::Float;
 45 | 	}
 46 | 
 47 | 	int GTensor::count(int start_axis) const {
 48 | 		if(start_axis >= 0 && start_axis < shape_.size()){
 49 | 			int size = 1;
 50 | 			for (int i = start_axis; i < shape_.size(); ++i) 
 51 | 				size *= shape_[i];
 52 | 			return size;
 53 | 		}else{
 54 | 			return 0;
 55 | 		}
 56 | 	}
 57 | 
 58 | 	///////////////////////////////////
 59 | 	LayerConfig::LayerConfig() {
 60 | 		support_dtype_set_ = {nvinfer1::DataType::kFLOAT};
 61 | 		support_plugin_format_set_ = {nvinfer1::PluginFormat::kLINEAR};
 62 | 		usage_dtype_ = TRT::DataType::Float;
 63 | 		usage_plugin_format_ = nvinfer1::PluginFormat::kLINEAR;
 64 | 	}
 65 | 
 66 | 	void LayerConfig::serialize_data_copy_to(void* buffer) {
 67 | 		if (!serialize_data_.empty())
 68 | 			memcpy(buffer, &serialize_data_[0], serialize_data_.size());
 69 | 	}
 70 | 
 71 | 	int LayerConfig::serialize() {
 72 | 
 73 | 		Plugin::BinIO out;
 74 | 		out << workspace_size_;
 75 | 		out << usage_dtype_;
 76 | 		out << max_batch_size_;
 77 | 		out << usage_plugin_format_;
 78 | 		out << info_;
 79 | 
 80 | 		out << (int)weights_.size();
 81 | 		for (int i = 0; i < weights_.size(); ++i) {
 82 | 
 83 | 			if (usage_dtype_ == TRT::DataType::Float) {
 84 | 				weights_[i]->to_float();
 85 | 			}
 86 | 			else if (usage_dtype_ == TRT::DataType::Float16) {
 87 | 				weights_[i]->to_half();
 88 | 			}
 89 | 			else{
 90 | 				INFOE("unsupport datatype: %d", (int)usage_dtype_);
 91 | 			}
 92 | 
 93 | 			out << weights_[i]->dims();
 94 | 			out << weights_[i]->type();
 95 | 			out.write((char*)weights_[i]->cpu(), weights_[i]->bytes());
 96 | 		}
 97 | 
 98 | 		seril(out);
 99 | 		serialize_data_ = out.writedMemory();
100 | 		return serialize_data_.size();
101 | 	}
102 | 
103 | 	void LayerConfig::deserialize(const void* ptr, size_t length) {
104 | 
105 | 		Plugin::BinIO in(ptr, length);
106 | 		in >> workspace_size_;
107 | 		in >> usage_dtype_;
108 | 		in >> max_batch_size_;
109 | 		in >> usage_plugin_format_;
110 | 		in >> info_;
111 | 
112 | 		int nbWeights = 0;
113 | 		in >> nbWeights;
114 | 
115 | 		weights_.resize(nbWeights);
116 | 		for (int i = 0; i < nbWeights; ++i) {
117 | 			std::vector<int> dims;
118 | 			in >> dims;
119 | 
120 | 			TRT::DataType dt;
121 | 			in >> dt;
122 | 
123 | 			weights_[i].reset(new TRT::Tensor(dims, dt));
124 | 			in.read(weights_[i]->cpu(), weights_[i]->bytes());
125 | 			weights_[i]->gpu();
126 | 		}
127 | 		deseril(in);
128 | 	}
129 | 
130 | 	void LayerConfig::setup(const std::string& info, const std::vector<std::shared_ptr<TRT::Tensor>>& weights) {
131 | 
132 | 		this->info_ = info;
133 | 		this->weights_ = weights;
134 | 	}
135 | 
136 | 	///////////////////////////////////////////////////////////////////////////////////
137 | 
138 | 	static TRT::DataType convert_trt_datatype(nvinfer1::DataType dt){
139 | 		switch(dt){
140 | 			case nvinfer1::DataType::kFLOAT: return TRT::DataType::Float;
141 | 			case nvinfer1::DataType::kHALF: return TRT::DataType::Float16;
142 | 			default:
143 | 				INFOE("Unsupport data type %d", dt);
144 | 				return TRT::DataType::Float;
145 | 		}
146 | 	}
147 | 
148 | 	TRTPlugin::~TRTPlugin() {
149 | 	}
150 | 
151 | 	void TRTPlugin::pluginInit(const std::string& name, const std::string& info, const std::vector<std::shared_ptr<TRT::Tensor>>& weights) {
152 | 		phase_ = CompilePhase;
153 | 		layerName_ = name;
154 | 		config_ = this->new_config();
155 | 		Assert(config_ != nullptr);
156 | 		config_->setup(info, weights);
157 | 		config_->init();
158 | 	}
159 | 
160 | 	void TRTPlugin::pluginInit(const std::string& name, const void* serialData, size_t serialLength) {
161 | 		phase_ = InferencePhase;
162 | 		layerName_ = name;
163 | 		config_ = this->new_config();
164 | 		Assert(config_ != nullptr);
165 | 		config_->deserialize(serialData, serialLength);
166 | 		config_->init();
167 | 	}
168 | 
169 | 	std::shared_ptr<LayerConfig> TRTPlugin::new_config() {
170 | 		return std::shared_ptr<LayerConfig>(new LayerConfig());
171 | 	}
172 | 
173 | 	int TRTPlugin::getNbOutputs() const noexcept{
174 | 		return config_->num_output_;
175 | 	}
176 | 
177 | 	void TRTPlugin::configurePlugin(
178 | 		const nvinfer1::DynamicPluginTensorDesc* in, int32_t nbInputs, 
179 | 		const nvinfer1::DynamicPluginTensorDesc* out, int32_t nbOutputs) noexcept{
180 | 
181 | 		auto type = in->desc.type;
182 | 		auto format = in->desc.format;
183 | 		this->config_->usage_dtype_     = convert_trt_datatype(type);
184 | 		this->config_->usage_plugin_format_ = format;
185 | 		this->config_->num_input_ = nbInputs;
186 | 		this->config_->max_batch_size_ = in->max.d[0];
187 | 		this->config_finish();
188 | 	}
189 | 
190 | 	int TRTPlugin::initialize() noexcept{
191 | 		return 0;
192 | 	}
193 | 
194 | 	void TRTPlugin::terminate() noexcept{
195 | 	}
196 | 
197 | 	bool TRTPlugin::supportsFormatCombination(
198 | 		int32_t pos, const nvinfer1::PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept{
199 | 		
200 | 		bool match = config_->support_dtype_set_.find(inOut[pos].type) != config_->support_dtype_set_.end() &&
201 | 		config_->support_plugin_format_set_.find(inOut[pos].format) != config_->support_plugin_format_set_.end();
202 | 		return match;
203 | 	}
204 | 
205 | 	size_t TRTPlugin::getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int32_t nbInputs, const nvinfer1::PluginTensorDesc* outputs,
206 | 		int32_t nbOutputs) const noexcept{
207 | 		return config_->workspace_size_;
208 | 	}
209 | 
210 | 	int32_t TRTPlugin::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc,
211 |         const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept{
212 | 
213 | 		if (inputTensors_.empty()) {
214 | 			inputTensors_.resize(config_->num_input_);
215 | 			outputTensors_.resize(config_->num_output_);
216 | 			weightTensors_.resize(config_->weights_.size());
217 | 
218 | 			for (int i = 0; i < weightTensors_.size(); ++i) {
219 | 				auto& w = config_->weights_[i];
220 | 				weightTensors_[i].shape_ = w->dims();
221 | 				weightTensors_[i].ptr_ = w->gpu();
222 | 				weightTensors_[i].dtype_ = w->type();
223 | 			}
224 | 		}
225 | 
226 | 		for (int i = 0; i < inputTensors_.size(); ++i) {
227 | 			inputTensors_[i].shape_ = std::vector<int>(inputDesc[i].dims.d, inputDesc[i].dims.d+inputDesc[i].dims.nbDims);
228 | 			inputTensors_[i].ptr_ = (void*)inputs[i];
229 | 			inputTensors_[i].dtype_ = convert_trt_datatype(inputDesc[i].type);
230 | 		}
231 | 
232 | 		for (int i = 0; i < outputTensors_.size(); ++i) {
233 | 			outputTensors_[i].shape_ = std::vector<int>(outputDesc[i].dims.d, outputDesc[i].dims.d+outputDesc[i].dims.nbDims);
234 | 			outputTensors_[i].ptr_ = outputs[i];
235 | 			outputTensors_[i].dtype_ = convert_trt_datatype(outputDesc[i].type);
236 | 		}
237 | 		return enqueue(inputTensors_, outputTensors_, weightTensors_, workspace, stream);
238 | 	}
239 | 
240 | 	size_t TRTPlugin::getSerializationSize() const noexcept{
241 | 		return config_->serialize();
242 | 	}
243 | 
244 | 	void TRTPlugin::serialize(void* buffer) const noexcept{
245 | 		config_->serialize_data_copy_to(buffer);
246 | 	}
247 | };// namespace Plugin


--------------------------------------------------------------------------------
/src/tensorRT/onnxplugin/onnxplugin.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef ONNX_PLUGIN_HPP
  3 | #define ONNX_PLUGIN_HPP
  4 | 
  5 | #include <memory>
  6 | #include <vector>
  7 | #include <set>
  8 | 
  9 | #include <NvInfer.h>
 10 | #include <NvInferRuntimeCommon.h>
 11 | #include <cuda_fp16.h>
 12 | 
 13 | #include <common/cuda_tools.hpp>
 14 | #include <infer/trt_infer.hpp>
 15 | #include "plugin_binary_io.hpp"
 16 | 
 17 | namespace ONNXPlugin {
 18 | 
 19 | 	enum Phase {
 20 | 		CompilePhase,
 21 | 		InferencePhase
 22 | 	};
 23 | 
 24 | 	struct GTensor {
 25 | 		GTensor() {}
 26 | 		GTensor(const TRT::Tensor& tensor);
 27 | 		GTensor(float* ptr, int ndims, int* dims);
 28 | 		GTensor(TRT::float16* ptr, int ndims, int* dims);
 29 | 
 30 | 		int count(int start_axis = 0) const;
 31 | 
 32 | 		template<typename ... _Args>
 33 | 		int offset(int index, _Args&& ... index_args) const{
 34 | 			const int index_array[] = {index, index_args...};
 35 |             return offset_array(sizeof...(index_args) + 1, index_array);
 36 | 		}
 37 | 
 38 | 		int offset_array(const std::vector<int>& index) const;
 39 | 		int offset_array(size_t size, const int* index_array) const;
 40 | 
 41 | 		inline int batch()   const{return shape_[0];}
 42 |         inline int channel() const{return shape_[1];}
 43 |         inline int height()  const{return shape_[2];}
 44 |         inline int width()   const{return shape_[3];}
 45 | 
 46 | 		template<typename _T>
 47 | 		inline _T* ptr() const { return (_T*)ptr_; }
 48 | 
 49 | 		template<typename _T, typename ... _Args>
 50 | 		inline _T* ptr(int i, _Args&& ... args) const { return (_T*)ptr_ + offset(i, args...); }
 51 | 
 52 | 		void* ptr_ = nullptr;
 53 | 		TRT::DataType dtype_ = TRT::DataType::Float;
 54 | 		std::vector<int> shape_;
 55 | 	};
 56 | 
 57 | 	struct LayerConfig {
 58 | 
 59 | 		///////////////////////////////////
 60 | 		int num_output_ = 1;
 61 | 		int num_input_  = 1;
 62 | 		size_t workspace_size_ = 0;
 63 | 		int max_batch_size_ = 0;
 64 | 		std::set<nvinfer1::DataType> support_dtype_set_;
 65 | 		std::set<nvinfer1::PluginFormat> support_plugin_format_set_;
 66 | 
 67 | 		std::vector<std::shared_ptr<TRT::Tensor>> weights_;
 68 | 		TRT::DataType usage_dtype_;
 69 | 		nvinfer1::PluginFormat usage_plugin_format_;
 70 | 		std::string info_;
 71 | 
 72 | 		///////////////////////////////////
 73 | 		std::string serialize_data_;
 74 | 
 75 | 		LayerConfig();
 76 | 		void serialize_data_copy_to(void* buffer);
 77 | 		int serialize();
 78 | 		void deserialize(const void* ptr, size_t length);
 79 | 		void setup(const std::string& info, const std::vector<std::shared_ptr<TRT::Tensor>>& weights);
 80 | 		virtual void seril(Plugin::BinIO& out) {}
 81 | 		virtual void deseril(Plugin::BinIO& in) {}
 82 | 		virtual void init(){}
 83 | 	};
 84 | 
 85 | 	#define SetupPlugin(class_)			\
 86 | 		virtual const char* getPluginType() const noexcept override{return #class_;};																		\
 87 | 		virtual const char* getPluginVersion() const noexcept override{return "1";};																			\
 88 | 		virtual nvinfer1::IPluginV2DynamicExt* clone() const noexcept override{return new class_(*this);}
 89 | 
 90 | 	#define RegisterPlugin(class_)		\
 91 | 	class class_##PluginCreator__ : public nvinfer1::IPluginCreator{																				\
 92 | 	public:																																			\
 93 | 		const char* getPluginName() const noexcept override{return #class_;}																					\
 94 | 		const char* getPluginVersion() const noexcept override{return "1";}																					\
 95 | 		const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override{return &mFieldCollection;}													\
 96 | 																																					\
 97 | 		nvinfer1::IPluginV2DynamicExt* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override{									\
 98 | 			auto plugin = new class_();																												\
 99 | 			mFieldCollection = *fc;																													\
100 | 			mPluginName = name;																														\
101 | 			return plugin;																															\
102 | 		}																																			\
103 | 																																					\
104 | 		nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override{								\
105 | 			auto plugin = new class_();																												\
106 | 			plugin->pluginInit(name, serialData, serialLength);																						\
107 | 			mPluginName = name;																														\
108 | 			return plugin;																															\
109 | 		}																																			\
110 | 																																					\
111 | 		void setPluginNamespace(const char* libNamespace) noexcept override{mNamespace = libNamespace;}														\
112 | 		const char* getPluginNamespace() const noexcept override{return mNamespace.c_str();}																	\
113 | 																																					\
114 | 	private:																																		\
115 | 		std::string mNamespace;																														\
116 | 		std::string mPluginName;																													\
117 | 		nvinfer1::PluginFieldCollection mFieldCollection{0, nullptr};																				\
118 | 	};																																				\
119 | 	REGISTER_TENSORRT_PLUGIN(class_##PluginCreator__);
120 | 
121 | 	class TRTPlugin : public nvinfer1::IPluginV2DynamicExt {
122 | 	public:
123 | 		virtual nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept override{return inputTypes[0];}
124 | 
125 | 		virtual void configurePlugin(
126 | 			const nvinfer1::DynamicPluginTensorDesc* in, int32_t nbInputs, 
127 | 			const nvinfer1::DynamicPluginTensorDesc* out, int32_t nbOutputs) noexcept override;
128 | 
129 | 		virtual void attachToContext(cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* /*allocator*/) noexcept override {}
130 | 		virtual void detachFromContext() noexcept override {}
131 | 		virtual void setPluginNamespace(const char* pluginNamespace) noexcept override{this->namespace_ = pluginNamespace;};
132 | 		virtual const char* getPluginNamespace() const noexcept override{return this->namespace_.data();};
133 | 
134 | 		virtual ~TRTPlugin();
135 | 		virtual int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) = 0;
136 | 
137 | 		void pluginInit(const std::string& name, const std::string& info, const std::vector<std::shared_ptr<TRT::Tensor>>& weights);
138 | 		void pluginInit(const std::string& name, const void* serialData, size_t serialLength);
139 | 		virtual void config_finish() {};
140 | 
141 | 		virtual std::shared_ptr<LayerConfig> new_config();
142 | 		virtual bool supportsFormatCombination(
143 | 			int32_t pos, const nvinfer1::PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
144 | 
145 | 		virtual int getNbOutputs() const noexcept;
146 | 		virtual nvinfer1::DimsExprs getOutputDimensions(
147 |         	int32_t outputIndex, const nvinfer1::DimsExprs* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept = 0;
148 | 
149 | 		virtual int initialize() noexcept;
150 | 		virtual void terminate() noexcept;
151 | 		virtual void destroy() noexcept override{}
152 | 		virtual size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int32_t nbInputs, const nvinfer1::PluginTensorDesc* outputs,
153 |         	int32_t nbOutputs) const noexcept override;
154 | 
155 | 		virtual int32_t enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc,
156 |             const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override;
157 | 
158 | 		virtual size_t getSerializationSize() const noexcept override;
159 | 		virtual void serialize(void* buffer) const noexcept override;
160 | 
161 | 	protected:
162 | 		std::string namespace_;
163 | 		std::string layerName_;
164 | 		Phase phase_ = CompilePhase;
165 | 		std::shared_ptr<LayerConfig> config_;
166 | 		std::vector<GTensor> inputTensors_;
167 | 		std::vector<GTensor> outputTensors_;
168 | 		std::vector<GTensor> weightTensors_;
169 | 	};
170 | 
171 | }; //namespace Plugin
172 | 
173 | #endif //ONNX_PLUGIN_HPP


--------------------------------------------------------------------------------
/src/tensorRT/onnxplugin/plugin_binary_io.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "plugin_binary_io.hpp"
  3 | #include "ilogger.hpp"
  4 | #include <string.h>
  5 | 
  6 | namespace Plugin{
  7 | 
  8 |     using namespace std;
  9 | 
 10 | 	BinIO::~BinIO(){
 11 | 		close();
 12 | 	}
 13 | 
 14 | 	bool BinIO::opened(){
 15 | 		if (flag_ == MemoryRead)
 16 | 			return memoryRead_ != nullptr;
 17 | 		else if (flag_ == MemoryWrite)
 18 | 			return true;
 19 | 		return false;
 20 | 	}
 21 | 
 22 | 	void BinIO::close(){
 23 |         if (flag_ == MemoryRead) {
 24 | 			memoryRead_ = nullptr;
 25 | 			memoryCursor_ = 0;
 26 | 			memoryLength_ = -1;
 27 | 		}
 28 | 		else if (flag_ == MemoryWrite) {
 29 | 			memoryWrite_.clear();
 30 | 			memoryCursor_ = 0;
 31 | 			memoryLength_ = -1;
 32 | 		}
 33 | 	}
 34 | 
 35 | 	string BinIO::readData(int numBytes){
 36 | 		string output;
 37 | 		output.resize(numBytes);
 38 | 
 39 | 		int readlen = read((void*)output.data(), output.size());
 40 | 		output.resize(readlen);
 41 | 		return output;
 42 | 	}
 43 | 
 44 | 	int BinIO::read(void* pdata, size_t length){
 45 | 
 46 |         if (flag_ == MemoryRead) {
 47 | 			if (memoryLength_ != -1) {
 48 | 				
 49 | 				if (memoryLength_ < memoryCursor_ + length) {
 50 | 					int remain = memoryLength_ - memoryCursor_;
 51 | 					if (remain > 0) {
 52 | 						memcpy(pdata, memoryRead_ + memoryCursor_, remain);
 53 | 						memoryCursor_ += remain;
 54 | 						return remain;
 55 | 					}
 56 | 					else {
 57 | 						return -1;
 58 | 					}
 59 | 				}
 60 | 			}
 61 | 			memcpy(pdata, memoryRead_ + memoryCursor_, length);
 62 | 			memoryCursor_ += length;
 63 | 			return length;
 64 | 		}
 65 | 		else {
 66 | 			return -1;
 67 | 		}
 68 | 	}
 69 | 	
 70 | 	bool BinIO::eof(){
 71 | 		if (!opened()) return true;
 72 | 
 73 | 		if (flag_ == MemoryRead){
 74 | 			return this->memoryCursor_ >= this->memoryLength_;
 75 | 		}
 76 | 		else if (flag_ == MemoryWrite){
 77 | 			return false;
 78 | 		}
 79 | 		else {
 80 | 			opstate_ = false;
 81 | 			INFO("Unsupport flag: %d", flag_);
 82 | 			return true;
 83 | 		}
 84 | 	}
 85 | 
 86 | 	int BinIO::write(const void* pdata, size_t length){
 87 | 
 88 | 		if (flag_ == MemoryWrite) {
 89 | 			memoryWrite_.append((char*)pdata, (char*)pdata + length);
 90 | 			return length;
 91 | 		}
 92 | 		else {
 93 | 			return -1;
 94 | 		}
 95 | 	}
 96 | 
 97 | 	int BinIO::writeData(const string& data){
 98 | 		return write(data.data(), data.size());
 99 | 	}
100 | 
101 | 	BinIO& BinIO::operator >> (string& value){
102 | 		//read
103 | 		int length = 0;
104 | 		(*this) >> length;
105 | 		value = readData(length);
106 | 		return *this;
107 | 	}
108 | 
109 | 	int BinIO::readInt(){
110 | 		int value = 0;
111 | 		(*this) >> value;
112 | 		return value;
113 | 	}
114 | 
115 | 	float BinIO::readFloat(){
116 | 		float value = 0;
117 | 		(*this) >> value;
118 | 		return value;
119 | 	}
120 | 
121 | 	BinIO& BinIO::operator << (const string& value){
122 | 		//write
123 | 		(*this) << (int)value.size();
124 | 		writeData(value);
125 | 		return *this;
126 | 	}
127 | 
128 | 	BinIO& BinIO::operator << (const char* value){
129 | 
130 | 		int length = strlen(value);
131 | 		(*this) << (int)length;
132 | 		write(value, length);
133 | 		return *this;
134 | 	}
135 | 
136 | 	BinIO& BinIO::operator << (const vector<string>& value){
137 | 		(*this) << (int)value.size();
138 | 		for (int i = 0; i < value.size(); ++i){
139 | 			(*this) << value[i];
140 | 		}
141 | 		return *this;
142 | 	}
143 | 
144 | 	BinIO& BinIO::operator >> (vector<string>& value){
145 | 		int num;
146 | 		(*this) >> num;
147 | 
148 | 		value.resize(num);
149 | 		for (int i = 0; i < value.size(); ++i)
150 | 			(*this) >> value[i];
151 | 		return *this;
152 | 	}
153 | 
154 | 	bool BinIO::openMemoryRead(const void* ptr, int memoryLength) {
155 | 		close();
156 | 
157 | 		if (!ptr) return false;
158 | 		memoryRead_ = (const char*)ptr;
159 | 		memoryCursor_ = 0;
160 | 		memoryLength_ = memoryLength;
161 | 		flag_ = MemoryRead;
162 | 		return true;
163 | 	}
164 | 
165 | 	void BinIO::openMemoryWrite() {
166 | 		close();
167 | 
168 | 		memoryWrite_.clear();
169 | 		memoryCursor_ = 0;
170 | 		memoryLength_ = -1;
171 | 		flag_ = MemoryWrite;
172 | 	}
173 | 
174 | }; // namespace Plugin


--------------------------------------------------------------------------------
/src/tensorRT/onnxplugin/plugin_binary_io.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef PLUGIN_BINARY_IO_HPP
 2 | #define PLUGIN_BINARY_IO_HPP
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | 
 7 | namespace Plugin{
 8 | 
 9 |     class BinIO {
10 |     public:
11 |         enum Head {
12 |             MemoryRead = 1,
13 |             MemoryWrite = 2
14 |         };
15 | 
16 |         BinIO() { openMemoryWrite(); }
17 |         BinIO(const void* ptr, int memoryLength = -1) { openMemoryRead(ptr, memoryLength); }
18 |         virtual ~BinIO();
19 |         bool opened();
20 |         bool openMemoryRead(const void* ptr, int memoryLength = -1);
21 |         void openMemoryWrite();
22 |         const std::string& writedMemory() { return memoryWrite_; }
23 |         void close();
24 |         int write(const void* pdata, size_t length);
25 |         int writeData(const std::string& data);
26 |         int read(void* pdata, size_t length);
27 |         std::string readData(int numBytes);
28 |         int readInt();
29 |         float readFloat();
30 |         bool eof();
31 | 
32 |         BinIO& operator >> (std::string& value);
33 |         BinIO& operator << (const std::string& value);
34 |         BinIO& operator << (const char* value);
35 |         BinIO& operator << (const std::vector<std::string>& value);
36 |         BinIO& operator >> (std::vector<std::string>& value);
37 | 
38 |         template<typename _T>
39 |         BinIO& operator >> (std::vector<_T>& value) {
40 |             int length = 0;
41 |             (*this) >> length;
42 | 
43 |             value.resize(length);
44 |             read(value.data(), length * sizeof(_T));
45 |             return *this;
46 |         }
47 | 
48 |         template<typename _T>
49 |         BinIO& operator << (const std::vector<_T>& value) {
50 |             (*this) << (int)value.size();
51 |             write(value.data(), sizeof(_T) * value.size());
52 |             return *this;
53 |         }
54 | 
55 |         template<typename _T>
56 |         BinIO& operator >> (_T& value) {
57 |             read(&value, sizeof(_T));
58 |             return *this;
59 |         }
60 | 
61 |         template<typename _T>
62 |         BinIO& operator << (const _T& value) {
63 |             write(&value, sizeof(_T));
64 |             return *this;
65 |         }
66 | 
67 |         bool opstate() const {
68 |             return opstate_;
69 |         }
70 | 
71 |     private:
72 |         size_t readModeEndSEEK_ = 0;
73 |         std::string memoryWrite_;
74 |         const char* memoryRead_ = nullptr;
75 |         int memoryCursor_ = 0;
76 |         int memoryLength_ = -1;
77 |         Head flag_ = MemoryWrite;
78 |         bool opstate_ = true;
79 |     };
80 | }; // namespace Plugin
81 | 
82 | #endif //PLUGIN_BINARY_IO_HPP


--------------------------------------------------------------------------------
/src/tensorRT/onnxplugin/plugins/Project2Dto3D.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <onnxplugin/onnxplugin.hpp>
  3 | #include <cuda_fp16.hpp>
  4 | 
  5 | #include <fstream>
  6 | 
  7 | #include <typeinfo>
  8 | 
  9 | // #include <tensor.hpp>
 10 | enum class DeviceType: int32_t {
 11 |     kHOST = 0,
 12 |     kGPU = 1
 13 | }; // emum class DeviceType
 14 | 
 15 | using namespace ONNXPlugin;
 16 | 
 17 | 
 18 | 
 19 | 
 20 | __global__ void build_LUT_kernel(int32_t n_x_voxels, int32_t n_y_voxels, int32_t n_z_voxels,
 21 |                                     float* voxel_size, float* origin, float* projection,
 22 |                                     int32_t* LUT, 
 23 |                                     int32_t n_images, int32_t height, int32_t width) {
 24 |    
 25 |     int32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
 26 |     int32_t zi = idx % n_z_voxels;
 27 |     idx /= n_z_voxels;
 28 |     int32_t yi = idx % n_y_voxels;
 29 |     idx /= n_y_voxels;
 30 |     int32_t xi = idx % n_x_voxels;
 31 |     idx /= n_x_voxels;
 32 |     int32_t img = idx;
 33 |     // printf("Current line: %d\n", __LINE__);
 34 |     if (img < n_images && LUT[(xi * n_y_voxels + yi) * n_z_voxels + zi] == -1 ) {
 35 |         // printf("Current line: %d\n", __LINE__);
 36 | 
 37 |         float size_x = voxel_size[0];
 38 |         // printf("Current line: %d\n", __LINE__);
 39 | 
 40 |         float size_y = voxel_size[1];
 41 |         float size_z = voxel_size[2];
 42 |         // printf("Current line: %d\n", __LINE__);
 43 | 
 44 |         float ar[3];
 45 |         float pt[3];
 46 |         // printf("Current line: %d\n", __LINE__);
 47 |         pt[0] = (xi - n_x_voxels / 2.0f) * size_x + origin[0];
 48 |         pt[1] = (yi - n_y_voxels / 2.0f) * size_y + origin[1];
 49 |         pt[2] = (zi - n_z_voxels / 2.0f) * size_z + origin[2];
 50 | 
 51 |         // printf("Current line: %d\n", __LINE__);
 52 |         for (int i = 0; i < 3; ++i) {
 53 |             ar[i] = 0;
 54 |             for (int j = 0; j < 3; ++j) {
 55 |                 ar[i] += projection[(img * 3 + i) * 4 + j] * pt[j];
 56 |             }
 57 |             ar[i] += projection[((img * 3) + i) * 4 + 3];
 58 |         }
 59 |         // printf("Current line: %d\n", __LINE__);
 60 |         int32_t x = round(ar[0] / ar[2]);
 61 |         int32_t y = round(ar[1] / ar[2]);
 62 |         float z = ar[2];
 63 | 
 64 |         // printf("Current line: %d\n", __LINE__);
 65 |         bool fit_in = (x >= 0) && (y >= 0) && (x < width) && (y < height) && (z > 0);
 66 |         int32_t target;
 67 |         if (fit_in) {
 68 |             target = (img * height + y) * width + x;
 69 |             
 70 |             int offset = (xi * n_y_voxels + yi) * n_z_voxels + zi;  // [xi,yi,zi]
 71 |             LUT[offset] = target;
 72 |             
 73 |             // valid[offset] = fit_in;
 74 |             // printf("Current line: %d\n", __LINE__);
 75 | 
 76 |         }
 77 |         else {
 78 |             target = -1;
 79 |             int offset = (xi * n_y_voxels + yi) * n_z_voxels + zi;  // [xi,yi,zi]
 80 |             LUT[offset] = target;
 81 |             
 82 |         }
 83 | 
 84 |         // printf("Current line: %d\n", __LINE__);
 85 |     }
 86 | 
 87 | }
 88 | 
 89 | __global__ void backproject_LUT_kernel(float* features, int32_t* LUT, float* volume,
 90 |     size_t total_nrof_voxels, int32_t n_channels) {
 91 |     int32_t offset = blockIdx.x * blockDim.x + threadIdx.x;
 92 |     size_t nrof_float4_copies_per_iter = n_channels / 4; // We assume n_channels % 4 == 0
 93 |     if (offset < total_nrof_voxels) {
 94 |         int32_t target = LUT[offset];
 95 |         if (target >= 0) {
 96 |             float4* src = (float4*)(features + target * n_channels);
 97 |             float4* dst = (float4*)(volume + offset * n_channels);
 98 |             for (size_t i = 0; i < nrof_float4_copies_per_iter; ++i) {
 99 |                 dst[i] = src[i];
100 |             }
101 |         }
102 |     }
103 | }
104 | 
105 | void backproject_LUT_CUDA(float* features_dev, int32_t* LUT_dev, float* volume_dev,
106 |                         int32_t n_images,  int32_t n_channels,
107 |                         float* n_voxels) {
108 |     // int32_t n_x_voxels = int32_t(n_voxels[0]);
109 |     // int32_t n_y_voxels = int32_t(n_voxels[1]);
110 |     // int32_t n_z_voxels = int32_t(n_voxels[2]);
111 |     int32_t n_x_voxels = 200;
112 |     int32_t n_y_voxels = 200;
113 |     int32_t n_z_voxels = 4;
114 |     size_t total_nrof_voxels = n_images * n_x_voxels * n_y_voxels * n_z_voxels;
115 |     #define BLOCK_SIZE 1024
116 |     dim3 thread_per_block(BLOCK_SIZE);
117 |     dim3 block_per_grid((total_nrof_voxels + thread_per_block.x - 1) / thread_per_block.x);
118 |     backproject_LUT_kernel<<< block_per_grid, thread_per_block >>>(features_dev, LUT_dev, volume_dev,
119 |         total_nrof_voxels, n_channels
120 |     );
121 | }
122 | 
123 | void backproject_LUT_GPU(float * features, int32_t * LUT, float* volume,
124 |                         float * n_voxels,int32_t n_images,int32_t n_channels) {
125 |     backproject_LUT_CUDA(features, LUT, volume,
126 |         n_images, n_channels,
127 |         n_voxels
128 |     );
129 | }
130 | 
131 | 
132 | void build_LUT_cuda(float* n_voxels, float* voxel_size_dev, float* origin_dev, float* projection,
133 |                     int32_t* LUT, 
134 |                     int32_t n_images, int32_t height, int32_t width) {
135 |     // int32_t n_x_voxels = int32_t(n_voxels[0]);
136 |     // int32_t n_y_voxels = int32_t(n_voxels[1]);
137 |     // int32_t n_z_voxels = int32_t(n_voxels[2]);
138 |     int32_t n_x_voxels = 200;
139 |     int32_t n_y_voxels = 200;
140 |     int32_t n_z_voxels = 4;
141 |     size_t total_nrof_voxels = n_images * n_x_voxels * n_y_voxels * n_z_voxels;
142 |     #define BLOCK_SIZE 1024
143 |     dim3 thread_per_block(BLOCK_SIZE);
144 |     dim3 block_per_grid((total_nrof_voxels + thread_per_block.x - 1) / thread_per_block.x);
145 | 
146 |     // printf("build here\n");
147 |     build_LUT_kernel<<< block_per_grid, thread_per_block >>>(n_x_voxels, n_y_voxels, n_z_voxels, 
148 |                         voxel_size_dev, origin_dev, projection,
149 |                         LUT, 
150 |                         n_images, height, width);
151 | }
152 | 
153 | void build_LUT_GPU(float* n_voxels, float* voxel_size, float* origin,
154 |                     float* projection, int32_t n_images, int32_t height, int32_t width, int32_t n_channels,
155 |                     int32_t* LUT) {
156 | 
157 |     build_LUT_cuda(n_voxels, voxel_size, origin, projection,
158 |                     LUT,
159 |                     n_images, height, width
160 |                     );
161 | 
162 | }
163 | 
164 | // 初始化空间，等价于cudaMemset(LUT,-1,lutsize*sizeof(int32_t));但是它不会产生异常
165 | __global__ void initializeWorkspaceKernel(int32_t* workspace, size_t numElements, int32_t initValue) {
166 |     size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
167 | 
168 |     if (idx < numElements) {
169 |         workspace[idx] = initValue;
170 |     }
171 | }
172 | void initializeWorkspace(void* workspace, size_t workspaceSize, cudaStream_t stream) {
173 |     int32_t* workspaceData = static_cast<int32_t*>(workspace);
174 |     size_t numElements = workspaceSize / sizeof(int32_t);
175 |     const int32_t initValue = -1;
176 | 
177 |     // Use a block size of 256 threads
178 |     const int32_t blockSize = 1024;
179 |     const int32_t numBlocks = (numElements + blockSize - 1) / blockSize;
180 | 
181 |     // Launch the CUDA kernel to initialize the workspace with -1
182 |     initializeWorkspaceKernel<<<numBlocks, blockSize, 0, stream>>>(workspaceData, numElements, initValue);
183 | }
184 | 
185 | 
186 | class Project2Dto3D : public TRTPlugin {
187 | public:
188 | 	SetupPlugin(Project2Dto3D);
189 | 
190 | 	virtual void config_finish() override{
191 | 	}
192 | 
193 | 	virtual std::shared_ptr<LayerConfig> new_config() override{
194 | 		auto cfg = TRTPlugin::new_config();
195 | 		cfg->support_dtype_set_ = {nvinfer1::DataType::kFLOAT};
196 | 
197 | 		return cfg;
198 | 	}
199 | 
200 | 	size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int32_t nbInputs, const nvinfer1::PluginTensorDesc* outputs,int32_t nbOutputs) const noexcept
201 | 	{   
202 | 		int32_t nSlices =  outputs[0].dims.d[0] *outputs[0].dims.d[1] *outputs[0].dims.d[2] *outputs[0].dims.d[3];
203 |         printf("nSlices %d \n",nSlices);
204 | 		return nSlices * sizeof(int32_t);
205 | 	}
206 | 
207 | 
208 |     nvinfer1::DimsExprs getOutputDimensions(
209 |         int32_t outputIndex, const nvinfer1::DimsExprs* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept{
210 |         nvinfer1::DimsExprs output_dims;
211 |         std::vector<int32_t> n_voxels{200, 200, 4};
212 |         output_dims.nbDims = 4;
213 |         output_dims.d[0] = exprBuilder.constant(n_voxels[0]);
214 |         output_dims.d[1] = exprBuilder.constant(n_voxels[1]);
215 |         output_dims.d[2] = exprBuilder.constant(n_voxels[2]);
216 |         output_dims.d[3] = inputs[0].d[3]; //64
217 | 
218 |         return output_dims;
219 |     }
220 | 
221 |     void cal_debug(float *input, int32_t size,int line,cudaStream_t stream,std::string input_name="") {
222 | 
223 |         std::cout << "================" << std::endl;
224 |         std::cout << "Input parameter name: " << input_name << std::endl;
225 | 
226 |         cudaStreamSynchronize(stream);
227 |         float * cal_features;
228 |         cal_features = (float*)malloc(size*sizeof(float));
229 |         cudaMemcpy(cal_features, input, size*sizeof(float), cudaMemcpyDeviceToHost);
230 | 
231 |         std::cout.setf(std::ios::fixed,std::ios::floatfield);
232 |         std::cout.precision(4);
233 |         auto print_size = size>100?100:size;
234 |         for(int i=0;i<print_size;i++)
235 |             std::cout << " "  << cal_features[i] ;
236 |         std::cout << " line " << line << " size "<< size<<std::endl;
237 |         
238 |         float sum = 0.0f;
239 |         float sumabs = 0.0f;
240 |         float mean = 0.0f;
241 |         float max = 0.0f;
242 |         float min = 0.0f;
243 |         
244 |         // std::ofstream outfile("./feather.txt");
245 | 	    // outfile.open("./feather.txt", ios::out);
246 |         
247 |         for (int i = 0; i < size; i++) {
248 |             sum += cal_features[i];  // 将当前元素加入到总和中
249 |             sumabs += (cal_features[i]>0? cal_features[i]:-cal_features[i]);  // 将当前元素加入到总和中
250 |             if (i == 0 || cal_features[i] > max) {  // 如果当前元素大于最大值，或者是第一个元素
251 |                 max = cal_features[i];  // 更新最大值
252 |             }
253 |             if (i == 0 || cal_features[i] < min) {  // 如果当前元素小于最小值，或者是第一个元素
254 |                 min = cal_features[i];  // 更新最小值
255 |             }
256 | 
257 |             // outfile<<cal_features[i] << " ";
258 |         }
259 | 
260 |         // outfile.close();
261 |         mean = sum / size;  // 计算平均值
262 |         std::cout << "size: " << size<< std::endl ;
263 |         std::cout << "Line: " << line<< std::endl ;
264 |         std::cout << "Sum: " << sum<< std::endl ;
265 |         std::cout << "Sumabs: " << sumabs<< std::endl ;
266 |         std::cout << "Mean: " << mean<< std::endl ;
267 |         std::cout << "Max: " << max<< std::endl ;
268 |         std::cout << "Min: " << min << std::endl;
269 |         std::cout << "================" << std::endl;
270 | 
271 |     }
272 |     void cal_debug(int32_t *input, int32_t size,int line,cudaStream_t stream,std::string input_name="") {
273 |         std::cout << "================" << std::endl;
274 |         std::cout << "Input parameter name: " << input_name << std::endl;
275 |         cudaStreamSynchronize(stream);
276 |         int32_t * cal_features;
277 |         cal_features = (int32_t*)malloc(size*sizeof(int32_t));
278 |         cudaMemcpy(cal_features, input, size*sizeof(int32_t), cudaMemcpyDeviceToHost);
279 | 
280 |         std::cout.setf(std::ios::fixed,std::ios::floatfield);
281 |         std::cout.precision(4);
282 |         for(int i=0;i<100;i++)
283 |             std::cout << " "  << cal_features[i] ;
284 |         std::cout << " line " << line << " size "<< size<<std::endl;
285 |         
286 |         int32_t sum = 0;
287 |         int32_t sumabs = 0;
288 |         int32_t mean = 0;
289 |         int32_t max = 0;
290 |         int32_t min = 0;
291 |         
292 |         // std::ofstream outfile("./feather.txt");
293 | 	    // outfile.open("./feather.txt", ios::out);
294 |         
295 |         for (int i = 0; i < size; i++) {
296 |             sum += cal_features[i];  // 将当前元素加入到总和中
297 |             sumabs += (cal_features[i]>0? cal_features[i]:-cal_features[i]);  // 将当前元素加入到总和中
298 |             if (i == 0 || cal_features[i] > max) {  // 如果当前元素大于最大值，或者是第一个元素
299 |                 max = cal_features[i];  // 更新最大值
300 |             }
301 |             if (i == 0 || cal_features[i] < min) {  // 如果当前元素小于最小值，或者是第一个元素
302 |                 min = cal_features[i];  // 更新最小值
303 |             }
304 | 
305 |             // outfile<<cal_features[i] << " ";
306 |         }
307 | 
308 |         // outfile.close();
309 |         mean = sum / size;  // 计算平均值
310 |         std::cout << "size: " << size<< std::endl ;
311 |         std::cout << "Line: " << line<< std::endl ;
312 |         std::cout << "Sum: " << sum<< std::endl ;
313 |         std::cout << "Sumabs: " << sumabs<< std::endl ;
314 |         std::cout << "Mean: " << mean<< std::endl ;
315 |         std::cout << "Max: " << max<< std::endl ;
316 |         std::cout << "Min: " << min << std::endl;
317 |         std::cout << "================" << std::endl;
318 | 
319 |     }
320 |     
321 | 
322 | 	int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) override{
323 |         // std::chrono::high_resolution_clock::time_point t1, t2;
324 |         
325 |         /////////////////////////////
326 |         /////////////////////////////
327 |         // 各种初始化，各种各种
328 |         auto &features_tensor = inputs[0];
329 |         auto &param_tensor = weights[0];
330 |         auto &volume_output = outputs[0];
331 | 
332 | 		if (config_->usage_dtype_ == TRT::DataType::Float) {
333 | 		}
334 | 		else if (config_->usage_dtype_ == TRT::DataType::Float16) { // TODO FP16需要数据流转，很不合理，待优化FP16Plugin
335 |             return 1;
336 | 			INFOF("not implement function");
337 | 		}
338 | 
339 |         int32_t n_images = features_tensor.shape_[0];
340 |         int32_t height = features_tensor.shape_[1];
341 |         int32_t width = features_tensor.shape_[2];
342 |         int32_t n_channels = features_tensor.shape_[3];
343 | 
344 |         float * features = features_tensor.ptr<float>();
345 | 
346 |         float * param_ = param_tensor.ptr<float>();
347 | 
348 |         float * n_voxels_float = param_;
349 |         float * voxel_size_tensor = n_voxels_float + 3;
350 |         float * origin_tensor = voxel_size_tensor +3;
351 |         float * projection_tensor = origin_tensor + 3;
352 | 
353 |         
354 |         int32_t *LUT  = (int32_t *)workspace;
355 |         size_t lutsize = volume_output.shape_[0]*volume_output.shape_[1]*volume_output.shape_[2]*volume_output.shape_[3];
356 | 
357 | 
358 |         /////////////////////////////
359 |         // 初始化LUT  -1
360 |         initializeWorkspace(LUT, lutsize, stream);
361 |         
362 |         /////////////////////////////
363 |         // 创建LUT 映射表 TODO代优化 初步思路在pytorch中实现，然后传进来
364 |         build_LUT_GPU(n_voxels_float, voxel_size_tensor, origin_tensor, projection_tensor,
365 |                     n_images, height, width, n_channels, LUT);
366 | 
367 | 
368 |         //////////////////////////////////
369 |         // 投影
370 |         backproject_LUT_GPU(features, LUT, volume_output.ptr<float>(), n_voxels_float,n_images, n_channels);
371 | 
372 |        
373 | 		return 0;
374 | 	}
375 | };
376 | 
377 | RegisterPlugin(Project2Dto3D);


--------------------------------------------------------------------------------