├── .gitignore ├── Makefile ├── README.md └── src ├── application ├── app_fastbev.cpp ├── fastbev.cpp ├── fastbev.hpp └── fastbev_decode.cu ├── main.cpp └── tensorRT ├── builder ├── trt_builder.cpp └── trt_builder.hpp ├── common ├── cuda_tools.cpp ├── cuda_tools.hpp ├── ilogger.cpp ├── ilogger.hpp ├── infer_controller.hpp ├── json.cpp ├── json.hpp ├── monopoly_allocator.hpp ├── preprocess_kernel.cu ├── preprocess_kernel.cuh ├── trt_tensor.cpp └── trt_tensor.hpp ├── import_lib.cpp ├── infer ├── trt_infer.cpp └── trt_infer.hpp ├── onnx ├── onnx-ml.pb.cpp ├── onnx-ml.pb.h ├── onnx-operators-ml.pb.cpp ├── onnx-operators-ml.pb.h ├── onnx_pb.h ├── onnxifi.h └── readme.md ├── onnx_parser ├── ImporterContext.hpp ├── LoopHelpers.cpp ├── LoopHelpers.hpp ├── ModelImporter.cpp ├── ModelImporter.hpp ├── NvOnnxParser.cpp ├── NvOnnxParser.h ├── OnnxAttrs.cpp ├── OnnxAttrs.hpp ├── RNNHelpers.cpp ├── RNNHelpers.hpp ├── ShapeTensor.cpp ├── ShapeTensor.hpp ├── ShapedWeights.cpp ├── ShapedWeights.hpp ├── Status.hpp ├── TensorOrWeights.hpp ├── builtin_op_importers.cpp ├── builtin_op_importers.hpp ├── onnx2trt.hpp ├── onnx2trt_common.hpp ├── onnx2trt_runtime.hpp ├── onnx2trt_utils.cpp ├── onnx2trt_utils.hpp ├── onnxErrorRecorder.cpp ├── onnxErrorRecorder.hpp ├── onnx_utils.hpp ├── readme.md ├── toposort.hpp ├── trt_utils.hpp └── utils.hpp └── onnxplugin ├── onnxplugin.cpp ├── onnxplugin.hpp ├── plugin_binary_io.cpp ├── plugin_binary_io.hpp └── plugins └── Project2Dto3D.cu /.gitignore: -------------------------------------------------------------------------------- 1 | workspace/ 2 | build/ 3 | .vscode/ -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | cpp_srcs := $(shell find src -name "*.cpp") 4 | cpp_objs := $(cpp_srcs:.cpp=.o) 5 | cpp_objs := $(cpp_objs:src/%=build/%) 6 | cpp_mk := $(cpp_objs:.o=.mk) 7 | 8 | cu_srcs := $(shell find src -name "*.cu") 9 | cu_objs := $(cu_srcs:.cu=.cuo) 10 | cu_objs := $(cu_objs:src/%=build/%) 11 | cu_mk := $(cu_objs:.cuo=.cumk) 12 | 13 | # 配置你的库路径 14 | lean_protobuf := /home/wicri/fy/code/cpp_cuda_centernet/protobuf/aarch64/protobuf 15 | lean_tensor_rt := /usr/lib/aarch64-linux-gnu/ 16 | lean_cudnn := /usr/local/cuda 17 | lean_opencv := /usr/include/ 18 | lean_cuda := /usr/local/cuda 19 | use_python := false 20 | python_root := /datav/software/anaconda3 21 | python_name := python3.9 22 | 23 | include_paths := src \ 24 | src/application \ 25 | src/camerasdk \ 26 | src/tensorRT \ 27 | src/tensorRT/common \ 28 | $(lean_protobuf)/include \ 29 | $(lean_opencv)/opencv4/ \ 30 | /usr/include/aarch64-linux-gnu/ \ 31 | $(lean_cuda)/include \ 32 | $(lean_cudnn)/include 33 | 34 | library_paths := $(lean_protobuf)/lib \ 35 | $(lean_opencv)/opencv4/ \ 36 | /usr/include/aarch64-linux-gnu/ \ 37 | $(lean_cuda)/lib64 \ 38 | $(lean_cudnn)/lib 39 | 40 | link_librarys := opencv_core opencv_imgproc opencv_videoio opencv_imgcodecs \ 41 | nvinfer nvinfer_plugin \ 42 | cuda cublas cudart cudnn \ 43 | stdc++ protobuf dl opencv_highgui opencv_ml 44 | 45 | # HAS_PYTHON表示是否编译python支持 46 | support_define := 47 | 48 | ifeq ($(use_python), true) 49 | include_paths += $(lean_python)/include/python3.8 50 | library_paths += $(lean_python)/lib 51 | link_librarys += python3.8 52 | support_define += -DHAS_PYTHON 53 | endif 54 | 55 | paths := $(foreach item,$(library_paths),-Wl,-rpath=$(item)) 56 | include_paths := $(foreach item,$(include_paths),-I$(item)) 57 | library_paths := $(foreach item,$(library_paths),-L$(item)) 58 | link_librarys := $(foreach item,$(link_librarys),-l$(item)) 59 | 60 | # 如果是其他显卡,请修改-gencode=arch=compute_75,code=sm_75为对应显卡的能力 61 | cpp_compile_flags := -std=c++11 -fPIC -g -fopenmp -w -O0 $(support_define) 62 | cu_compile_flags := -std=c++11 -m64 -Xcompiler -fPIC -g -w -gencode=arch=compute_72,code=sm_72 -O0 $(support_define) 63 | link_flags := -pthread -fopenmp -Wl,-rpath='$$ORIGIN' 64 | 65 | cpp_compile_flags += $(include_paths) 66 | cu_compile_flags += $(include_paths) 67 | link_flags += $(library_paths) $(link_librarys) $(paths) 68 | 69 | cpp_compile_flags += -I/opt/ros/melodic/include 70 | 71 | ifneq ($(MAKECMDGOALS), clean) 72 | -include $(cpp_mk) $(cu_mk) 73 | endif 74 | 75 | pro : workspace/pro 76 | 77 | workspace/pro : $(cpp_objs) $(cu_objs) 78 | @echo Link $@ 79 | @mkdir -p $(dir $@) 80 | @g++ $^ -o $@ $(link_flags) 81 | 82 | 83 | build/%.o : src/%.cpp 84 | @echo Compile CXX $< 85 | @mkdir -p $(dir $@) 86 | @g++ -c $< -o $@ $(cpp_compile_flags) 87 | 88 | build/%.cuo : src/%.cu 89 | @echo Compile CUDA $< 90 | @mkdir -p $(dir $@) 91 | @nvcc -c $< -o $@ $(cu_compile_flags) 92 | 93 | build/%.mk : src/%.cpp 94 | @echo Compile depends CXX $< 95 | @mkdir -p $(dir $@) 96 | @g++ -M $< -MF $@ -MT $(@:.mk=.o) $(cpp_compile_flags) 97 | 98 | build/%.cumk : src/%.cu 99 | @echo Compile depends CUDA $< 100 | @mkdir -p $(dir $@) 101 | @nvcc -M $< -MF $@ -MT $(@:.cumk=.o) $(cu_compile_flags) 102 | 103 | 104 | bev : workspace/pro 105 | @cd workspace && ./pro fastbev 106 | 107 | clean : 108 | @rm -rf build workspace/pro python/trtpy/libtrtpyc.so python/build python/dist python/trtpy.egg-info python/trtpy/__pycache__ 109 | @rm -rf build 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Detail readme.md is coming soon 3 | 4 | # how to use 5 | 6 | pytorch to onnx 7 | 8 | https://github.com/thfylsty/FastBEV 9 | 10 | make bev -j8 11 | 12 | 13 | # Reference: 14 | 15 | TensorRT 16 | 17 | https://github.com/shouxieai/tensorRT_Pro -------------------------------------------------------------------------------- /src/application/app_fastbev.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "fastbev.hpp" 6 | #include 7 | 8 | using namespace std; 9 | 10 | 11 | static const char* label_map[] = { 12 | "Pedestrian", "Car","MotorcyleRider", "Crane", "Motorcycle", "Bus", "BicycleRider", "Van", "Excavator", "TricycleRider","Truck" 13 | }; 14 | static void append_to_file(const string& file, const string& data){ 15 | FILE* f = fopen(file.c_str(), "a+"); 16 | if(f == nullptr){ 17 | INFOE("Open %s failed.", file.c_str()); 18 | return; 19 | } 20 | 21 | fprintf(f, "%s\n", data.c_str()); 22 | fclose(f); 23 | } 24 | 25 | std::vector rotate_box(float x1, float y1, float x2, float y2, float r) { 26 | // Step 1: Translate coordinates to top-left corner 27 | float cx = (x1 + x2) / 2.0f; 28 | float cy = (y1 + y2) / 2.0f; 29 | x1 -= cx; 30 | y1 -= cy; 31 | x2 -= cx; 32 | y2 -= cy; 33 | 34 | // Step 2: Convert angle to radians 35 | r = r * M_PI / 180.0f; 36 | 37 | // Step 3: Compute rotation matrix 38 | float cos_r = cos(r); 39 | float sin_r = sin(r); 40 | 41 | // Step 4: Rotate box vertices 42 | float x1_new = cos_r * x1 - sin_r * y1; 43 | float y1_new = sin_r * x1 + cos_r * y1; 44 | float x2_new = cos_r * x2 - sin_r * y2; 45 | float y2_new = sin_r * x2 + cos_r * y2; 46 | 47 | // Step 5: Translate coordinates back to original position 48 | x1_new += cx; 49 | y1_new += cy; 50 | x2_new += cx; 51 | y2_new += cy; 52 | 53 | // Step 6: Pack rotated box coordinates into vector and return 54 | std::vector rotated_box = {x1_new, y1_new, x2_new, y2_new}; 55 | return rotated_box; 56 | } 57 | 58 | void forward(shared_ptr &engine, Fastbev::Image &images,cv::Mat &bevimg){ 59 | 60 | auto boxes = engine->commit(images).get(); 61 | // printf("boxes [%d]\n",boxes.size()); 62 | int bevsize_w = 1000; 63 | int bevsize_h = 600; 64 | cv::Mat img(bevsize_h, bevsize_w, CV_8UC3, cv::Scalar(255,255,255)); 65 | for(auto& obj : boxes){ 66 | // printf("class[%s] confidence[%f] label[%d] x[%f] y[%f] z[%f] dx[%f] dy[%f] dz[%f] rot[%f] \n", 67 | // label_map[obj.label],obj.confidence,obj.label,obj.x,obj.y,obj.z,obj.dx,obj.dy,obj.dz,obj.rot); 68 | 69 | // Calculate the four corner points of the rotated rectangle 70 | uint8_t b, g, r; 71 | tie(b, g, r) = iLogger::random_color(obj.label + 1); 72 | 73 | int x = bevsize_w - (obj.y + 50)*10; 74 | int y = bevsize_h - obj.x * 10; 75 | int w = obj.dx * 10; 76 | int h = obj.dy * 10; 77 | int rot = int(90 - obj.rot/3.1415926*180 + 360)%180; 78 | // printf("%d %d %d %d %d \n",x,y,x+w,y+h,r); 79 | cv::RotatedRect box(cv::Point(x, y), cv::Size(w, h), rot); 80 | cv::Point2f vertex[4]; 81 | box.points(vertex); 82 | for (int i = 0; i < 4; i++) 83 | cv::line(img, vertex[i], vertex[(i + 1) % 4], cv::Scalar(b, g, r),10,cv::LINE_AA); 84 | 85 | auto caption = iLogger::format("[%s %.2f]", label_map[obj.label],obj.confidence); 86 | cv::putText(img, caption, (cv::Point(x, y-w-10)), 0, 0.5, cv::Scalar(b, g, r), 1, 16); 87 | 88 | cv::circle(img, cv::Point(bevsize_w/2,bevsize_h), 20, cv::Scalar(0, 0, 0), cv::FILLED); 89 | 90 | } 91 | bevimg = img; 92 | cv::imwrite("result.png", img); 93 | } 94 | 95 | 96 | 97 | static void inference_and_performance(int deviceid, const string& engine_file, TRT::Mode mode, const string& model_name,const string& imgpath){ 98 | 99 | auto engine = Fastbev::create_infer( 100 | engine_file, // engine file 101 | deviceid, // gpu id 102 | 0.9f, // confidence threshold 103 | 0.45f, // nms threshold 104 | Fastbev::NMSMethod::FastGPU, // NMS method, fast GPU / CPU 105 | 1024, // max objects 106 | false // preprocess use multi stream 107 | ); 108 | if(engine == nullptr){ 109 | INFOE("Engine is nullptr"); 110 | return; 111 | } 112 | 113 | 114 | std::vector images_mat; 115 | auto imagef = cv::imread("./images/2022-05-12-11-24-22_000005_front.png"); 116 | auto imagel = cv::imread("./images/2022-05-12-11-24-22_000005_left.png"); 117 | auto imager = cv::imread("./images/2022-05-12-11-24-22_000005_right.png"); 118 | 119 | images_mat.emplace_back(imagef); 120 | images_mat.emplace_back(imagel); 121 | images_mat.emplace_back(imager); 122 | 123 | Fastbev::Image images(images_mat); 124 | auto boxes = engine->commit(images).get(); 125 | for(auto& obj : boxes) 126 | printf("class[%s] confidence[%f] label[%d] x[%f] y[%f] z[%f] dx[%f] dy[%f] dz[%f] rot[%f] \n", 127 | label_map[obj.label],obj.confidence,obj.label,obj.x,obj.y,obj.z,obj.dx,obj.dy,obj.dz,obj.rot); 128 | 129 | printf("input images height %d width %d nums %d \n",images.get_height(),images.get_width(),images.get_nums()); 130 | 131 | 132 | // warmup 133 | for(int i = 0; i < 10; ++i) 134 | auto boxes = engine->commit(images).get(); 135 | 136 | 137 | int test_nums = 20; 138 | auto begin_timer = iLogger::timestamp_now_float(); 139 | for(int i = 0; i < test_nums; ++i) 140 | auto boxes = engine->commit(images).get(); 141 | float inference_average_time = (iLogger::timestamp_now_float() - begin_timer) / test_nums; 142 | INFO(" ==== average: %.2f ms / iter, FPS: %.2f === ", inference_average_time, 1000 / inference_average_time); 143 | 144 | std::vector allimages; 145 | 146 | for(int idx =0 ;idx < 160 ;idx ++){ 147 | std::ostringstream oss; 148 | oss << std::setw(6) << std::setfill('0') << idx; 149 | 150 | std::string imgfile = "./roadsideimages/2022-05-09-08-47-43_" +oss.str(); //+ "_front.png" 151 | imagef = cv::imread(imgfile+"_front.png"); 152 | imagel = cv::imread(imgfile+"_left.png"); 153 | imager = cv::imread(imgfile+"_right.png"); 154 | std::cout << imgfile+"_front.png" <commits(images); 192 | // for (auto & image :images){ 193 | // boxes_array.emplace_back(engine->commit(image)); 194 | // } 195 | 196 | // // wait all result 197 | // boxes_array.back().get(); 198 | 199 | // float inference_average_time = (iLogger::timestamp_now_float() - begin_timer) / ntest / images.get_nums(); 200 | // auto mode_name = TRT::mode_string(mode); 201 | // INFO("%s average: %.2f ms / image, FPS: %.2f", engine_file.c_str(), inference_average_time, 1000 / inference_average_time); 202 | // append_to_file("perf.result.log", iLogger::format("%s,%s,%f", model_name.c_str(), mode_name, inference_average_time)); 203 | 204 | 205 | // string root = iLogger::format("%s_%s_%s_result", imgpath.c_str(),model_name.c_str(), mode_name); 206 | // iLogger::rmtree(root); 207 | // iLogger::mkdir(root); 208 | 209 | // for(int i = 0; i < boxes_array.size(); ++i){ 210 | 211 | // // auto& image = images[i].cvmat; 212 | // auto boxes = boxes_array[i].get(); 213 | 214 | // for(auto& obj : boxes){ 215 | // printf("[%f %f %f][%d %f]\n",obj.x,obj.y,obj.z,obj.label,obj.confidence); 216 | // // uint8_t b, g, r; 217 | // // tie(b, g, r) = iLogger::random_color(obj.class_label); 218 | // // cv::rectangle(image, cv::Point(obj.left, obj.top), cv::Point(obj.right, obj.bottom), cv::Scalar(b, g, r), 2); 219 | 220 | // // auto name = cocolabels[obj.class_label]; 221 | // // auto caption = iLogger::format("%s [%.2f %.2f]", name,obj.confidence,obj.depth); 222 | // // int width = cv::getTextSize(caption, 0, 0.5, 1, nullptr).width + 1; 223 | // // cv::rectangle(image, cv::Point(obj.left-3, obj.top-20), cv::Point(obj.left + width, obj.top), cv::Scalar(b, g, r), -1); 224 | // // cv::putText(image, caption, cv::Point(obj.left, obj.top-5), 0, 0.5, cv::Scalar::all(0), 1, 16); 225 | // } 226 | 227 | // // string file_name = iLogger::file_name(files[i], false); 228 | // // string save_path = iLogger::format("%s/%s.jpg", root.c_str(), file_name.c_str()); 229 | // // INFO("Save to %s, %d object, average time %.2f ms", save_path.c_str(), boxes.size(), inference_average_time); 230 | // // cv::imwrite(save_path, image); 231 | // // cv::imshow("f", image);cv::waitKey(0); 232 | // } 233 | engine.reset(); 234 | } 235 | 236 | static void test(TRT::Mode mode, const string& model,const string& imgpath,const int batch_size){ 237 | 238 | int deviceid = 0; 239 | auto mode_name = TRT::mode_string(mode); 240 | TRT::set_device(deviceid); 241 | 242 | 243 | const char* name = model.c_str(); 244 | INFO("===================== test %s %s ==================================", mode_name, name); 245 | 246 | 247 | string onnx_file = iLogger::format("onnxs/%s.onnx", name); 248 | string model_file = iLogger::format("engines/%s.%s.bs%d.engine", name, mode_name, batch_size); 249 | std::cout << model_file<< std::endl; 250 | int test_batch_size = batch_size; 251 | 252 | if(not iLogger::exists(model_file)){ 253 | TRT::compile( 254 | mode, // FP32、FP16、INT8 255 | test_batch_size, // max batch size 256 | onnx_file, // source 257 | model_file, // save to 258 | {}, 259 | nullptr, 260 | "inference" 261 | ); 262 | } 263 | 264 | inference_and_performance(deviceid, model_file, mode, name,imgpath); 265 | 266 | } 267 | 268 | 269 | int app_fastbev(){ 270 | 271 | // test(TRT::Mode::FP32, "roadside_train_half_res_aug_20230405-2208—epoch_50_20230414-1135","images",1); 272 | test(TRT::Mode::FP16, "roadside_train_half_res_aug_20230405-2208—epoch_50_20230414-1135","images",1); 273 | 274 | return 0; 275 | } -------------------------------------------------------------------------------- /src/application/fastbev.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef FASTBEV_HPP 3 | #define FASTBEV_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | namespace Fastbev{ 15 | 16 | using namespace std; 17 | 18 | enum class ImageType : int{ 19 | CVMat = 0, 20 | GPUYUV = 1 // nv12 21 | }; 22 | 23 | struct Image{ 24 | ImageType type = ImageType::CVMat; 25 | cv::Mat cvmat; 26 | std::vector cvmats; 27 | 28 | // GPU YUV image 29 | TRT::CUStream stream = nullptr; 30 | // uint8_t* device_data = nullptr; 31 | int width = 0, height = 0; 32 | int device_id = 0; 33 | 34 | Image() = default; 35 | Image(const std::vector& cvmats):cvmats(cvmats), type(ImageType::CVMat), 36 | width(cvmats[0].cols),height(cvmats[0].cols){} 37 | 38 | int get_nums() const{return cvmats.size(); } 39 | int get_width() const{return cvmats[0].cols;} 40 | int get_height() const{return cvmats[0].rows;} 41 | cv::Size get_size() const{return cv::Size(get_width(), get_height()*3);} 42 | bool empty() const{return cvmats.size()==0 || cvmats[0].empty();} 43 | 44 | 45 | }; 46 | 47 | 48 | struct Box{ 49 | float x,y,z,dx,dy,dz,rot,confidence; 50 | int label; 51 | 52 | Box() = default; 53 | 54 | Box(float x, float y, float z, float dx, float dy, float dz, float rot, float confidence, int label) 55 | :x(x), y(y), z(z), dx(dx),dy(dy),dz(dz),rot(rot), confidence(confidence), label(label){} 56 | }; 57 | 58 | typedef std::vector BoxArray; 59 | 60 | 61 | enum class NMSMethod : int{ 62 | CPU = 0, // General, for estimate mAP 63 | FastGPU = 1 // Fast NMS with a small loss of accuracy in corner cases 64 | }; 65 | 66 | // void image_to_tensor(const cv::Mat& image, shared_ptr& tensor, int ibatch); 67 | 68 | class Infer{ 69 | public: 70 | virtual shared_future commit(const Image& image) = 0; 71 | }; 72 | 73 | shared_ptr create_infer( 74 | const string& engine_file, int gpuid, 75 | float confidence_threshold=0.25f, float nms_threshold=0.5f, 76 | NMSMethod nms_method = NMSMethod::FastGPU, int max_objects = 1024, 77 | bool use_multi_preprocess_stream = false 78 | ); 79 | 80 | 81 | }; // namespace Fastbev 82 | 83 | #endif -------------------------------------------------------------------------------- /src/application/fastbev_decode.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include 4 | 5 | namespace Fastbev{ 6 | 7 | const int NUM_BOX_ELEMENT = 10; // fastbev: x y z dx dy dz r conf class keepflag 8 | // static __device__ void affine_project(float* matrix, float x, float y, float* ox, float* oy){ 9 | // *ox = matrix[0] * x + matrix[1] * y + matrix[2]; 10 | // *oy = matrix[3] * x + matrix[4] * y + matrix[5]; 11 | // } 12 | 13 | static __global__ void decode_kernel(float* predict, int num_bboxes, int num_classes, float confidence_threshold, float* parray, int max_objects){ 14 | 15 | int position = blockDim.x * blockIdx.x + threadIdx.x; 16 | if (position >= num_bboxes) return; 17 | 18 | float* pitem = predict + 20 * position; // fastbev: 7+2+11 x y z dx dy dz r d classnum 19 | // float objectness = pitem[4]; 20 | // if(objectness < confidence_threshold) 21 | // return; 22 | float* class_confidence = pitem; 23 | float *conf_tmp = class_confidence; 24 | float confidence = *class_confidence++; 25 | int label = 0; 26 | for(int i = 1; i < num_classes; ++i, ++class_confidence){ 27 | if(*class_confidence > confidence){ 28 | confidence = *class_confidence; 29 | label = i; 30 | } 31 | } 32 | // printf("confidence_threshold[%f] %f",confidence_threshold,*pitem); 33 | 34 | // confidence *= objectness; 35 | if(confidence < confidence_threshold) 36 | return; 37 | 38 | int index = atomicAdd(parray, 1); 39 | if(index >= max_objects) 40 | return; 41 | 42 | pitem = pitem+num_classes; 43 | float x = *pitem++; 44 | float y = *pitem++; 45 | float z = *pitem++; 46 | float dx = *pitem++; 47 | float dy = *pitem++; 48 | float dz = *pitem++; 49 | float r = *pitem++; 50 | float dir1 = *pitem++; 51 | float dir2 = *pitem++; 52 | 53 | // printf("label[%d] | confidence[%f] | x[%f] | y[%f] | z[%f] | dx[%f] | dy[%f] | dz[%f] | r[%f] | dir[%f] \n",label,confidence,x,y,z,dx,dy,dz,r, dir); 54 | 55 | float* pout_item = parray + 1 + index * NUM_BOX_ELEMENT; 56 | *pout_item++ = x; 57 | *pout_item++ = y; 58 | *pout_item++ = z; 59 | *pout_item++ = dx; 60 | *pout_item++ = dy; 61 | *pout_item++ = dz; 62 | *pout_item++ = r + (dir1 > dir2 ? 0 : 1)*3.1415926; 63 | *pout_item++ = confidence; 64 | *pout_item++ = label; 65 | *pout_item++ = 1.0; // 1 = keep, 0 = ignore 66 | 67 | } 68 | 69 | static __device__ float box_iou( 70 | float aleft, float atop, float aright, float abottom, 71 | float bleft, float btop, float bright, float bbottom 72 | ){ 73 | 74 | float cleft = max(aleft, bleft); 75 | float ctop = max(atop, btop); 76 | float cright = min(aright, bright); 77 | float cbottom = min(abottom, bbottom); 78 | 79 | float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f); 80 | if(c_area == 0.0f) 81 | return 0.0f; 82 | 83 | float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop); 84 | float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop); 85 | return c_area / (a_area + b_area - c_area); 86 | } 87 | 88 | static __global__ void nms_kernel(float* bboxes, int max_objects, float threshold){ 89 | 90 | int position = (blockDim.x * blockIdx.x + threadIdx.x); 91 | int count = min((int)*bboxes, max_objects); 92 | if (position >= count) 93 | return; 94 | 95 | // x y z dx dy dz r conf class keepflag 96 | float* pcurrent = bboxes + 1 + position * NUM_BOX_ELEMENT; 97 | for(int i = 0; i < count; ++i){ 98 | float* pitem = bboxes + 1 + i * NUM_BOX_ELEMENT; 99 | if(i == position || pcurrent[8] != pitem[8]) continue; 100 | 101 | if(pitem[7] >= pcurrent[7]){ 102 | if(pitem[7] == pcurrent[7] && i < position) 103 | continue; 104 | 105 | float p_x=pcurrent[0]; float p_y=pcurrent[1];float p_dx=pcurrent[3];float p_dy=pcurrent[4]; 106 | float n_x=pitem[0]; float n_y=pitem[1];float n_dx=pitem[3];float n_dy=pitem[4]; 107 | 108 | float iou = box_iou( 109 | p_x,p_y,p_x + p_dx,p_y + p_dy, 110 | n_x,n_y,n_x +n_dx,n_y +n_dy 111 | ); 112 | if(iou > threshold){ 113 | pcurrent[9] = 0; // 1=keep, 0=ignore 114 | return; 115 | } 116 | } 117 | } 118 | } 119 | 120 | void decode_kernel_invoker(float* predict, int num_bboxes, int num_classes, float confidence_threshold, float* parray, int max_objects, cudaStream_t stream){ 121 | 122 | auto grid = CUDATools::grid_dims(num_bboxes); 123 | auto block = CUDATools::block_dims(num_bboxes); 124 | 125 | 126 | checkCudaKernel(decode_kernel<<>>(predict, num_bboxes, num_classes, confidence_threshold, parray, max_objects)); 127 | } 128 | 129 | void nms_kernel_invoker(float* parray, float nms_threshold, int max_objects, cudaStream_t stream){ 130 | 131 | auto grid = CUDATools::grid_dims(max_objects); 132 | auto block = CUDATools::block_dims(max_objects); 133 | checkCudaKernel(nms_kernel<<>>(parray, max_objects, nms_threshold)); 134 | } 135 | }; -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int app_fastbev(); 7 | 8 | int main(){ 9 | app_fastbev(); 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /src/tensorRT/builder/trt_builder.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef TRT_BUILDER_HPP 4 | #define TRT_BUILDER_HPP 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace TRT { 12 | 13 | typedef std::function& files, std::shared_ptr& tensor)> Int8Process; 14 | typedef std::function(const std::string& name, const std::vector& shape)> LayerHookFuncReshape; 15 | 16 | enum class ModelSourceType : int{ 17 | OnnX, 18 | OnnXData 19 | }; 20 | 21 | class ModelSource { 22 | public: 23 | ModelSource() = default; 24 | ModelSource(const std::string& onnxmodel); 25 | ModelSource(const char* onnxmodel); 26 | ModelSourceType type() const; 27 | std::string onnxmodel() const; 28 | std::string descript() const; 29 | const void* onnx_data() const; 30 | size_t onnx_data_size() const; 31 | 32 | static ModelSource onnx(const std::string& file){ 33 | ModelSource output; 34 | output.onnxmodel_ = file; 35 | output.type_ = ModelSourceType::OnnX; 36 | return output; 37 | } 38 | 39 | static ModelSource onnx_data(const void* ptr, size_t size){ 40 | ModelSource output; 41 | output.onnx_data_ = ptr; 42 | output.onnx_data_size_ = size; 43 | output.type_ = ModelSourceType::OnnXData; 44 | return output; 45 | } 46 | 47 | private: 48 | std::string onnxmodel_; 49 | const void* onnx_data_ = nullptr; 50 | size_t onnx_data_size_ = 0; 51 | ModelSourceType type_; 52 | }; 53 | 54 | enum class CompileOutputType : int{ 55 | File, 56 | Memory 57 | }; 58 | 59 | class CompileOutput{ 60 | public: 61 | CompileOutput(CompileOutputType type = CompileOutputType::Memory); 62 | CompileOutput(const std::string& file); 63 | CompileOutput(const char* file); 64 | void set_data(const std::vector& data); 65 | void set_data(std::vector&& data); 66 | 67 | const std::vector& data() const{return data_;}; 68 | CompileOutputType type() const{return type_;} 69 | std::string file() const{return file_;} 70 | 71 | private: 72 | CompileOutputType type_ = CompileOutputType::Memory; 73 | std::vector data_; 74 | std::string file_; 75 | }; 76 | 77 | class InputDims { 78 | public: 79 | InputDims() = default; 80 | 81 | // 当为-1时,保留导入时的网络结构尺寸 82 | InputDims(const std::initializer_list& dims); 83 | InputDims(const std::vector& dims); 84 | 85 | const std::vector& dims() const; 86 | 87 | private: 88 | std::vector dims_; 89 | }; 90 | 91 | enum class Mode : int { 92 | FP32, 93 | FP16, 94 | INT8 95 | }; 96 | 97 | const char* mode_string(Mode type); 98 | 99 | void set_layer_hook_reshape(const LayerHookFuncReshape& func); 100 | 101 | /** 当处于INT8模式时,int8process必须制定 102 | int8ImageDirectory和int8EntropyCalibratorFile指定一个即可 103 | 如果初次生成,指定了int8EntropyCalibratorFile,calibrator会保存到int8EntropyCalibratorFile指定的文件 104 | 如果已经生成过,指定了int8EntropyCalibratorFile,calibrator会从int8EntropyCalibratorFile指定的文件加载,而不是 105 | 从int8ImageDirectory读取图片再重新生成 106 | 当处于FP32或者FP16时,int8process、int8ImageDirectory、int8EntropyCalibratorFile都不需要指定 107 | 对于嵌入式设备,请把maxWorkspaceSize设置小一点,比如128MB = 1ul << 27 108 | **/ 109 | bool compile( 110 | Mode mode, 111 | unsigned int maxBatchSize, 112 | const ModelSource& source, 113 | const CompileOutput& saveto, 114 | const std::vector inputsDimsSetup = {}, 115 | Int8Process int8process = nullptr, 116 | const std::string& int8ImageDirectory = "", 117 | const std::string& int8EntropyCalibratorFile = "", 118 | const size_t maxWorkspaceSize = 2ul << 30 // 1ul << 30 = 1GB 119 | ); 120 | }; 121 | 122 | #endif //TRT_BUILDER_HPP -------------------------------------------------------------------------------- /src/tensorRT/common/cuda_tools.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * 系统关于CUDA的功能函数 4 | */ 5 | 6 | 7 | #include "cuda_tools.hpp" 8 | 9 | namespace CUDATools{ 10 | bool check_driver(CUresult e, const char* call, int line, const char *file) { 11 | if (e != CUDA_SUCCESS) { 12 | 13 | const char* message = nullptr; 14 | const char* name = nullptr; 15 | cuGetErrorString(e, &message); 16 | cuGetErrorName(e, &name); 17 | INFOE("CUDA Driver error %s # %s, code = %s [ %d ] in file %s:%d", call, message, name, e, file, line); 18 | return false; 19 | } 20 | return true; 21 | } 22 | 23 | bool check_runtime(cudaError_t e, const char* call, int line, const char *file){ 24 | if (e != cudaSuccess) { 25 | INFOE("CUDA Runtime error %s # %s, code = %s [ %d ] in file %s:%d", call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line); 26 | return false; 27 | } 28 | return true; 29 | } 30 | 31 | bool check_device_id(int device_id){ 32 | int device_count = -1; 33 | checkCudaRuntime(cudaGetDeviceCount(&device_count)); 34 | if(device_id < 0 || device_id >= device_count){ 35 | INFOE("Invalid device id: %d, count = %d", device_id, device_count); 36 | return false; 37 | } 38 | return true; 39 | } 40 | 41 | int current_device_id(){ 42 | int device_id = 0; 43 | checkCudaRuntime(cudaGetDevice(&device_id)); 44 | return device_id; 45 | } 46 | 47 | dim3 grid_dims(int numJobs) { 48 | int numBlockThreads = numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS; 49 | return dim3(((numJobs + numBlockThreads - 1) / (float)numBlockThreads)); 50 | } 51 | 52 | dim3 block_dims(int numJobs) { 53 | return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS; 54 | } 55 | 56 | std::string device_capability(int device_id){ 57 | cudaDeviceProp prop; 58 | checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id)); 59 | return iLogger::format("%d.%d", prop.major, prop.minor); 60 | } 61 | 62 | std::string device_name(int device_id){ 63 | cudaDeviceProp prop; 64 | checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id)); 65 | return prop.name; 66 | } 67 | 68 | std::string device_description(){ 69 | 70 | cudaDeviceProp prop; 71 | size_t free_mem, total_mem; 72 | int device_id = 0; 73 | 74 | checkCudaRuntime(cudaGetDevice(&device_id)); 75 | checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id)); 76 | checkCudaRuntime(cudaMemGetInfo(&free_mem, &total_mem)); 77 | 78 | return iLogger::format( 79 | "[ID %d]<%s>[arch %d.%d][GMEM %.2f GB/%.2f GB]", 80 | device_id, prop.name, prop.major, prop.minor, 81 | free_mem / 1024.0f / 1024.0f / 1024.0f, 82 | total_mem / 1024.0f / 1024.0f / 1024.0f 83 | ); 84 | } 85 | 86 | AutoDevice::AutoDevice(int device_id){ 87 | 88 | cudaGetDevice(&old_); 89 | checkCudaRuntime(cudaSetDevice(device_id)); 90 | } 91 | 92 | AutoDevice::~AutoDevice(){ 93 | checkCudaRuntime(cudaSetDevice(old_)); 94 | } 95 | } -------------------------------------------------------------------------------- /src/tensorRT/common/cuda_tools.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CUDA_TOOLS_HPP 2 | #define CUDA_TOOLS_HPP 3 | 4 | /* 5 | * 系统关于CUDA的功能函数 6 | */ 7 | 8 | #include 9 | #include 10 | #include "ilogger.hpp" 11 | 12 | #define GPU_BLOCK_THREADS 512 13 | 14 | 15 | #define KernelPositionBlock \ 16 | int position = (blockDim.x * blockIdx.x + threadIdx.x); \ 17 | if (position >= (edge)) return; 18 | 19 | 20 | #define checkCudaDriver(call) CUDATools::check_driver(call, #call, __LINE__, __FILE__) 21 | #define checkCudaRuntime(call) CUDATools::check_runtime(call, #call, __LINE__, __FILE__) 22 | 23 | #define checkCudaKernel(...) \ 24 | __VA_ARGS__; \ 25 | do{cudaError_t cudaStatus = cudaPeekAtLastError(); \ 26 | if (cudaStatus != cudaSuccess){ \ 27 | INFOE("launch failed: %s", cudaGetErrorString(cudaStatus)); \ 28 | }} while(0); 29 | 30 | 31 | #define Assert(op) \ 32 | do{ \ 33 | bool cond = !(!(op)); \ 34 | if(!cond){ \ 35 | INFOF("Assert failed, " #op); \ 36 | } \ 37 | }while(false) 38 | 39 | 40 | struct CUctx_st; 41 | struct CUstream_st; 42 | 43 | typedef CUstream_st* ICUStream; 44 | typedef CUctx_st* ICUContext; 45 | typedef void* ICUDeviceptr; 46 | typedef int DeviceID; 47 | 48 | namespace CUDATools{ 49 | bool check_driver(CUresult e, const char* call, int iLine, const char *szFile); 50 | bool check_runtime(cudaError_t e, const char* call, int iLine, const char *szFile); 51 | bool check_device_id(int device_id); 52 | int current_device_id(); 53 | 54 | dim3 grid_dims(int numJobs); 55 | dim3 block_dims(int numJobs); 56 | 57 | // return 8.6 etc. 58 | std::string device_capability(int device_id); 59 | std::string device_name(int device_id); 60 | std::string device_description(); 61 | 62 | class AutoDevice{ 63 | public: 64 | AutoDevice(int device_id = 0); 65 | virtual ~AutoDevice(); 66 | 67 | private: 68 | int old_ = -1; 69 | }; 70 | } 71 | 72 | 73 | #endif // CUDA_TOOLS_HPP -------------------------------------------------------------------------------- /src/tensorRT/common/ilogger.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef ILOGGER_HPP 3 | #define ILOGGER_HPP 4 | 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | #if defined(_WIN32) 13 | # define U_OS_WINDOWS 14 | #else 15 | # define U_OS_LINUX 16 | #endif 17 | 18 | 19 | namespace iLogger{ 20 | 21 | using namespace std; 22 | 23 | enum class LogLevel : int{ 24 | Debug = 5, 25 | Verbose = 4, 26 | Info = 3, 27 | Warning = 2, 28 | Error = 1, 29 | Fatal = 0 30 | }; 31 | 32 | #define INFOD(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Debug, __VA_ARGS__) 33 | #define INFOV(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Verbose, __VA_ARGS__) 34 | #define INFO(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Info, __VA_ARGS__) 35 | #define INFOW(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Warning, __VA_ARGS__) 36 | #define INFOE(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Error, __VA_ARGS__) 37 | #define INFOF(...) iLogger::__log_func(__FILE__, __LINE__, iLogger::LogLevel::Fatal, __VA_ARGS__) 38 | 39 | string date_now(); 40 | string time_now(); 41 | string gmtime_now(); 42 | string gmtime(time_t t); 43 | time_t gmtime2ctime(const string& gmt); 44 | void sleep(int ms); 45 | 46 | bool isfile(const string& file); 47 | bool mkdir(const string& path); 48 | bool mkdirs(const string& path); 49 | bool delete_file(const string& path); 50 | bool rmtree(const string& directory, bool ignore_fail=false); 51 | bool exists(const string& path); 52 | string format(const char* fmt, ...); 53 | FILE* fopen_mkdirs(const string& path, const string& mode); 54 | string file_name(const string& path, bool include_suffix=true); 55 | string directory(const string& path); 56 | long long timestamp_now(); 57 | double timestamp_now_float(); 58 | time_t last_modify(const string& file); 59 | vector load_file(const string& file); 60 | string load_text_file(const string& file); 61 | size_t file_size(const string& file); 62 | 63 | bool begin_with(const string& str, const string& with); 64 | bool end_with(const string& str, const string& with); 65 | vector split_string(const string& str, const std::string& spstr); 66 | string replace_string(const string& str, const string& token, const string& value, int nreplace=-1, int* out_num_replace=nullptr); 67 | 68 | // h[0-1], s[0-1], v[0-1] 69 | // return, 0-255, 0-255, 0-255 70 | tuple hsv2rgb(float h, float s, float v); 71 | tuple random_color(int id); 72 | 73 | // abcdefg.pnga *.png > false 74 | // abcdefg.png *.png > true 75 | // abcdefg.png a?cdefg.png > true 76 | bool pattern_match(const char* str, const char* matcher, bool igrnoe_case = true); 77 | vector find_files( 78 | const string& directory, 79 | const string& filter = "*", bool findDirectory = false, bool includeSubDirectory = false); 80 | 81 | string align_blank(const string& input, int align_size, char blank=' '); 82 | bool save_file(const string& file, const vector& data, bool mk_dirs = true); 83 | bool save_file(const string& file, const string& data, bool mk_dirs = true); 84 | bool save_file(const string& file, const void* data, size_t length, bool mk_dirs = true); 85 | 86 | // 捕获:SIGINT(2)、SIGQUIT(3) 87 | int while_loop(); 88 | 89 | // 关于logger的api 90 | const char* level_string(LogLevel level); 91 | void set_logger_save_directory(const string& loggerDirectory); 92 | 93 | void set_log_level(LogLevel level); 94 | LogLevel get_log_level(); 95 | void __log_func(const char* file, int line, LogLevel level, const char* fmt, ...); 96 | void destroy_logger(); 97 | 98 | string base64_decode(const string& base64); 99 | string base64_encode(const void* data, size_t size); 100 | 101 | inline int upbound(int n, int align = 32){return (n + align - 1) / align * align;} 102 | string join_dims(const vector& dims); 103 | }; 104 | 105 | 106 | #endif // ILOGGER_HPP -------------------------------------------------------------------------------- /src/tensorRT/common/infer_controller.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INFER_CONTROLLER_HPP 2 | #define INFER_CONTROLLER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "monopoly_allocator.hpp" 13 | 14 | template, class JobAdditional=int> 15 | class InferController{ 16 | public: 17 | struct Job{ 18 | Input input; 19 | Output output; 20 | JobAdditional additional; 21 | MonopolyAllocator::MonopolyDataPointer mono_tensor; 22 | std::shared_ptr> pro; 23 | }; 24 | 25 | virtual ~InferController(){ 26 | stop(); 27 | } 28 | 29 | void stop(){ 30 | run_ = false; 31 | cond_.notify_all(); 32 | 33 | ////////////////////////////////////////// cleanup jobs 34 | { 35 | std::unique_lock l(jobs_lock_); 36 | while(!jobs_.empty()){ 37 | auto& item = jobs_.front(); 38 | if(item.pro) 39 | item.pro->set_value(Output()); 40 | jobs_.pop(); 41 | } 42 | }; 43 | 44 | if(worker_){ 45 | worker_->join(); 46 | worker_.reset(); 47 | } 48 | } 49 | 50 | bool startup(const StartParam& param){ 51 | run_ = true; 52 | 53 | std::promise pro; 54 | start_param_ = param; 55 | worker_ = std::make_shared(&InferController::worker, this, std::ref(pro)); 56 | return pro.get_future().get(); 57 | } 58 | 59 | virtual std::shared_future commit(const Input& input){ 60 | 61 | Job job; 62 | job.pro = std::make_shared>(); 63 | if(!preprocess(job, input)){ 64 | job.pro->set_value(Output()); 65 | return job.pro->get_future(); 66 | } 67 | 68 | /////////////////////////////////////////////////////////// 69 | { 70 | std::unique_lock l(jobs_lock_); 71 | jobs_.push(job); 72 | }; 73 | cond_.notify_one(); 74 | return job.pro->get_future(); 75 | } 76 | 77 | virtual std::vector> commits(const std::vector& inputs){ 78 | 79 | int batch_size = std::min((int)inputs.size(), this->tensor_allocator_->capacity()); 80 | std::vector jobs(inputs.size()); 81 | std::vector> results(inputs.size()); 82 | 83 | int nepoch = (inputs.size() + batch_size - 1) / batch_size; 84 | for(int epoch = 0; epoch < nepoch; ++epoch){ 85 | int begin = epoch * batch_size; 86 | int end = std::min((int)inputs.size(), begin + batch_size); 87 | 88 | for(int i = begin; i < end; ++i){ 89 | Job& job = jobs[i]; 90 | job.pro = std::make_shared>(); 91 | if(!preprocess(job, inputs[i])){ 92 | job.pro->set_value(Output()); 93 | } 94 | results[i] = job.pro->get_future(); 95 | } 96 | 97 | /////////////////////////////////////////////////////////// 98 | { 99 | std::unique_lock l(jobs_lock_); 100 | for(int i = begin; i < end; ++i){ 101 | jobs_.emplace(std::move(jobs[i])); 102 | }; 103 | } 104 | cond_.notify_one(); 105 | } 106 | return results; 107 | } 108 | 109 | protected: 110 | virtual void worker(std::promise& result) = 0; 111 | virtual bool preprocess(Job& job, const Input& input) = 0; 112 | 113 | virtual bool get_jobs_and_wait(std::vector& fetch_jobs, int max_size){ 114 | 115 | std::unique_lock l(jobs_lock_); 116 | cond_.wait(l, [&](){ 117 | return !run_ || !jobs_.empty(); 118 | }); 119 | 120 | if(!run_) return false; 121 | 122 | fetch_jobs.clear(); 123 | for(int i = 0; i < max_size && !jobs_.empty(); ++i){ 124 | fetch_jobs.emplace_back(std::move(jobs_.front())); 125 | jobs_.pop(); 126 | } 127 | return true; 128 | } 129 | 130 | virtual bool get_job_and_wait(Job& fetch_job){ 131 | 132 | std::unique_lock l(jobs_lock_); 133 | cond_.wait(l, [&](){ 134 | return !run_ || !jobs_.empty(); 135 | }); 136 | 137 | if(!run_) return false; 138 | 139 | fetch_job = std::move(jobs_.front()); 140 | jobs_.pop(); 141 | return true; 142 | } 143 | 144 | protected: 145 | StartParam start_param_; 146 | std::atomic run_; 147 | std::mutex jobs_lock_; 148 | std::queue jobs_; 149 | std::shared_ptr worker_; 150 | std::condition_variable cond_; 151 | std::shared_ptr> tensor_allocator_; 152 | }; 153 | 154 | #endif // INFER_CONTROLLER_HPP -------------------------------------------------------------------------------- /src/tensorRT/common/monopoly_allocator.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * 独占分配器 3 | * 用以解决以下问题: 4 | * 1. 实现tensor复用的问题 5 | * 2. 对于tensor使用的两个阶段实现并行,时间重叠 6 | * 阶段一:预处理准备 7 | * 阶段二:模型推理 8 | * 9 | * 设计思路: 10 | * 以海底捞吃火锅为类比,座位分为两种:堂内吃饭的座位、厅外等候的座位 11 | * 12 | * 1. 初始状态,堂内有10个座位,厅外有10个座位,全部空 13 | * 2. 来了30个人吃火锅 14 | * 3. 流程是,先安排10个人坐在厅外修整,20人个人排队等候 15 | * 4. 由于堂内没人,所以调度坐在厅外的10个人进入堂内,开始吃火锅。厅外的10个座位为空 16 | * 5. 由于厅外没人,所以可以让排队的20人中,取10个人在厅外修整 17 | * 6. 此时状态为,堂内10人,厅外10人,等候10人 18 | * 7. 经过60分钟后,堂内10人吃完,紧接着执行步骤4 19 | * 20 | * 在实际工作中,通常图像输入过程有预处理、推理 21 | * 我们的目的是让预处理和推理时间进行重叠。因此设计了一个缓冲区,类似厅外等候区的那种形式 22 | * 当我们输入图像时,具有2倍batch的空间进行预处理用于缓存 23 | * 而引擎推理时,每次拿1个batch的数据进行推理 24 | * 当引擎推理速度慢而预处理速度快时,输入图像势必需要进行等候。否则缓存队列会越来越大 25 | * 而这里提到的几个点就是设计的主要目标 26 | **/ 27 | 28 | #ifndef MONOPOLY_ALLOCATOR_HPP 29 | #define MONOPOLY_ALLOCATOR_HPP 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | template 37 | class MonopolyAllocator{ 38 | public: 39 | /* Data是数据容器类 40 | 允许query获取的item执行item->release释放自身所有权,该对象可以被复用 41 | 通过item->data()获取储存的对象的指针 42 | */ 43 | class MonopolyData{ 44 | public: 45 | std::shared_ptr<_ItemType>& data(){ return data_; } 46 | void release(){manager_->release_one(this);} 47 | 48 | private: 49 | MonopolyData(MonopolyAllocator* pmanager){manager_ = pmanager;} 50 | 51 | private: 52 | friend class MonopolyAllocator; 53 | MonopolyAllocator* manager_ = nullptr; 54 | std::shared_ptr<_ItemType> data_; 55 | bool available_ = true; 56 | }; 57 | typedef std::shared_ptr MonopolyDataPointer; 58 | 59 | MonopolyAllocator(int size){ 60 | capacity_ = size; 61 | num_available_ = size; 62 | datas_.resize(size); 63 | 64 | for(int i = 0; i < size; ++i) 65 | datas_[i] = std::shared_ptr(new MonopolyData(this)); 66 | } 67 | 68 | virtual ~MonopolyAllocator(){ 69 | run_ = false; 70 | cv_.notify_all(); 71 | 72 | std::unique_lock l(lock_); 73 | cv_exit_.wait(l, [&](){ 74 | return num_wait_thread_ == 0; 75 | }); 76 | } 77 | 78 | /* 获取一个可用的对象 79 | timeout:超时时间,如果没有可用的对象,将会进入阻塞等待,如果等待超时则返回空指针 80 | 请求得到一个对象后,该对象被占用,除非他执行了release释放该对象所有权 81 | */ 82 | MonopolyDataPointer query(int timeout = 10000){ 83 | 84 | std::unique_lock l(lock_); 85 | if(!run_) return nullptr; 86 | 87 | if(num_available_ == 0){ 88 | num_wait_thread_++; 89 | 90 | auto state = cv_.wait_for(l, std::chrono::milliseconds(timeout), [&](){ 91 | return num_available_ > 0 || !run_; 92 | }); 93 | 94 | num_wait_thread_--; 95 | cv_exit_.notify_one(); 96 | 97 | // timeout, no available, exit program 98 | if(!state || num_available_ == 0 || !run_) 99 | return nullptr; 100 | } 101 | 102 | auto item = std::find_if(datas_.begin(), datas_.end(), [](MonopolyDataPointer& item){return item->available_;}); 103 | if(item == datas_.end()) 104 | return nullptr; 105 | 106 | (*item)->available_ = false; 107 | num_available_--; 108 | return *item; 109 | } 110 | 111 | int num_available(){ 112 | return num_available_; 113 | } 114 | 115 | int capacity(){ 116 | return capacity_; 117 | } 118 | 119 | private: 120 | void release_one(MonopolyData* prq){ 121 | std::unique_lock l(lock_); 122 | if(!prq->available_){ 123 | prq->available_ = true; 124 | num_available_++; 125 | cv_.notify_one(); 126 | } 127 | } 128 | 129 | private: 130 | std::mutex lock_; 131 | std::condition_variable cv_; 132 | std::condition_variable cv_exit_; 133 | std::vector datas_; 134 | int capacity_ = 0; 135 | volatile int num_available_ = 0; 136 | volatile int num_wait_thread_ = 0; 137 | volatile bool run_ = true; 138 | }; 139 | 140 | #endif // MONOPOLY_ALLOCATOR_HPP -------------------------------------------------------------------------------- /src/tensorRT/common/preprocess_kernel.cuh: -------------------------------------------------------------------------------- 1 | #ifndef PREPROCESS_KERNEL_CUH 2 | #define PREPROCESS_KERNEL_CUH 3 | 4 | #include "cuda_tools.hpp" 5 | 6 | namespace CUDAKernel{ 7 | 8 | enum class NormType : int{ 9 | None = 0, 10 | MeanStd = 1, 11 | AlphaBeta = 2 12 | }; 13 | 14 | enum class ChannelType : int{ 15 | None = 0, 16 | Invert = 1 17 | }; 18 | 19 | struct Norm{ 20 | float mean[3]; 21 | float std[3]; 22 | float alpha, beta; 23 | NormType type = NormType::None; 24 | ChannelType channel_type = ChannelType::None; 25 | 26 | // out = (x * alpha - mean) / std 27 | static Norm mean_std(const float mean[3], const float std[3], float alpha = 1/255.0f, ChannelType channel_type=ChannelType::None); 28 | 29 | // out = x * alpha + beta 30 | static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type=ChannelType::None); 31 | 32 | // None 33 | static Norm None(); 34 | }; 35 | 36 | void resize_bilinear_and_normalize( 37 | uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height, 38 | const Norm& norm, 39 | cudaStream_t stream); 40 | 41 | void warp_affine_bilinear_and_normalize_plane( 42 | uint8_t* src, int src_line_size, int src_width, int src_height, 43 | float* dst , int dst_width, int dst_height, 44 | float* matrix_2_3, uint8_t const_value, const Norm& norm, 45 | cudaStream_t stream); 46 | 47 | void warp_affine_bilinear_and_normalize_focus( 48 | uint8_t* src, int src_line_size, int src_width, int src_height, 49 | float* dst , int dst_width, int dst_height, 50 | float* matrix_2_3, uint8_t const_value, const Norm& norm, 51 | cudaStream_t stream); 52 | 53 | // 可以用来图像校正、图像旋转等等 (测试比cpu快10倍以上) 54 | // 使用示范: 55 | // float* matrix_3_3 = nullptr; 56 | // size_t matrix_bytes = 3 * 3 * sizeof(f32); 57 | // checkCudaRuntime(cudaMalloc(&matrix_3_3, matrix_bytes)); 58 | // checkCudaRuntime(cudaMemset(matrix_3_3, 0, matrix_bytes)); 59 | // 60 | // #左上、右上、右下、左下 原图像四个点的坐标 61 | // cv::Point2f src_points[] = { 62 | // vctvctPoints[nImageIdx][0], 63 | // vctvctPoints[nImageIdx][1], 64 | // vctvctPoints[nImageIdx][2], 65 | // vctvctPoints[nImageIdx][3]}; 66 | // 67 | // #左上、右上、左下、右下(Z 字形排列) 目标图像四个点的坐标 68 | // cv::Point2f dst_points[] = { 69 | // cv::Point2f(0, 0), 70 | // cv::Point2f(nw-1, 0), 71 | // cv::Point2f(0, nh-1), 72 | // cv::Point2f(nw-1, nh-1) }; 73 | // 利用opencv 得到变换矩阵 dst -> src 的 矩阵 74 | // cv::Mat Perspect_Matrix = cv::getPerspectiveTransform(dst_points, src_points); 75 | // Perspect_Matrix.convertTo(Perspect_Matrix, CV_32FC1); 76 | // 拷贝到 gpu 77 | // checkCudaRuntime(cudaMemcpy(matrix_3_3, Perspect_Matrix.data, matrix_bytes, cudaMemcpyHostToDevice)); 78 | void warp_perspective( 79 | uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height, 80 | float* matrix_3_3, uint8_t const_value, const Norm& norm, cudaStream_t stream 81 | ); 82 | 83 | void norm_feature( 84 | float* feature_array, int num_feature, int feature_length, 85 | cudaStream_t stream 86 | ); 87 | 88 | // void convert_nv12_to_bgr_invoke( 89 | // const uint8_t* y, const uint8_t* uv, int width, int height, 90 | // int linesize, uint8_t* dst, 91 | // cudaStream_t stream); 92 | 93 | // void convert_yuv422_to_bgr_invoke( 94 | // const uint8_t* y, const uint8_t* uv, int width, int height, 95 | // int linesize, uint8_t* dst, 96 | // cudaStream_t stream); 97 | 98 | void gpuConvertYUYVtoRGB( 99 | const uint8_t* src, uint8_t* dst,int width, int height, cudaStream_t stream); 100 | }; 101 | 102 | #endif // PREPROCESS_KERNEL_CUH -------------------------------------------------------------------------------- /src/tensorRT/common/trt_tensor.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TRT_TENSOR_HPP 3 | #define TRT_TENSOR_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | struct CUstream_st; 12 | typedef CUstream_st CUStreamRaw; 13 | 14 | #define CURRENT_DEVICE_ID -1 15 | 16 | namespace TRT { 17 | 18 | typedef struct{unsigned short _;} float16; 19 | typedef CUStreamRaw* CUStream; 20 | 21 | enum class DataHead : int{ 22 | Init = 0, 23 | Device = 1, 24 | Host = 2 25 | }; 26 | 27 | enum class DataType : int { 28 | Unknow = -1, 29 | Float = 0, 30 | Float16 = 1, 31 | Int32 = 2, 32 | UInt8 = 3 33 | }; 34 | 35 | float float16_to_float(float16 value); 36 | float16 float_to_float16(float value); 37 | int data_type_size(DataType dt); 38 | const char* data_head_string(DataHead dh); 39 | const char* data_type_string(DataType dt); 40 | 41 | class MixMemory { 42 | public: 43 | MixMemory(int device_id = CURRENT_DEVICE_ID); 44 | MixMemory(void* cpu, size_t cpu_size, void* gpu, size_t gpu_size); 45 | virtual ~MixMemory(); 46 | void* gpu(size_t size); 47 | void* cpu(size_t size); 48 | void release_gpu(); 49 | void release_cpu(); 50 | void release_all(); 51 | 52 | inline bool owner_gpu() const{return owner_gpu_;} 53 | inline bool owner_cpu() const{return owner_cpu_;} 54 | 55 | inline size_t cpu_size() const{return cpu_size_;} 56 | inline size_t gpu_size() const{return gpu_size_;} 57 | inline int device_id() const{return device_id_;} 58 | 59 | inline void* gpu() const { return gpu_; } 60 | 61 | // Pinned Memory 62 | inline void* cpu() const { return cpu_; } 63 | 64 | void reference_data(void* cpu, size_t cpu_size, void* gpu, size_t gpu_size); 65 | 66 | private: 67 | void* cpu_ = nullptr; 68 | size_t cpu_size_ = 0; 69 | bool owner_cpu_ = true; 70 | int device_id_ = 0; 71 | 72 | void* gpu_ = nullptr; 73 | size_t gpu_size_ = 0; 74 | bool owner_gpu_ = true; 75 | }; 76 | 77 | class Tensor { 78 | public: 79 | Tensor(const Tensor& other) = delete; 80 | Tensor& operator = (const Tensor& other) = delete; 81 | 82 | explicit Tensor(DataType dtype = DataType::Float, std::shared_ptr data = nullptr, int device_id = CURRENT_DEVICE_ID); 83 | explicit Tensor(int n, int c, int h, int w, DataType dtype = DataType::Float, std::shared_ptr data = nullptr, int device_id = CURRENT_DEVICE_ID); 84 | explicit Tensor(int ndims, const int* dims, DataType dtype = DataType::Float, std::shared_ptr data = nullptr, int device_id = CURRENT_DEVICE_ID); 85 | explicit Tensor(const std::vector& dims, DataType dtype = DataType::Float, std::shared_ptr data = nullptr, int device_id = CURRENT_DEVICE_ID); 86 | virtual ~Tensor(); 87 | 88 | int numel() const; 89 | inline int ndims() const{return shape_.size();} 90 | inline int size(int index) const{return shape_[index];} 91 | inline int shape(int index) const{return shape_[index];} 92 | 93 | inline int batch() const{return shape_[0];} 94 | inline int channel() const{return shape_[1];} 95 | inline int height() const{return shape_[2];} 96 | inline int width() const{return shape_[3];} 97 | 98 | inline DataType type() const { return dtype_; } 99 | inline const std::vector& dims() const { return shape_; } 100 | inline const std::vector& strides() const {return strides_;} 101 | inline int bytes() const { return bytes_; } 102 | inline int bytes(int start_axis) const { return count(start_axis) * element_size(); } 103 | inline int element_size() const { return data_type_size(dtype_); } 104 | inline DataHead head() const { return head_; } 105 | 106 | std::shared_ptr clone() const; 107 | Tensor& release(); 108 | Tensor& set_to(float value); 109 | bool empty() const; 110 | 111 | template 112 | int offset(int index, _Args ... index_args) const{ 113 | const int index_array[] = {index, index_args...}; 114 | return offset_array(sizeof...(index_args) + 1, index_array); 115 | } 116 | 117 | int offset_array(const std::vector& index) const; 118 | int offset_array(size_t size, const int* index_array) const; 119 | 120 | template 121 | Tensor& resize(int dim_size, _Args ... dim_size_args){ 122 | const int dim_size_array[] = {dim_size, dim_size_args...}; 123 | return resize(sizeof...(dim_size_args) + 1, dim_size_array); 124 | } 125 | 126 | Tensor& resize(int ndims, const int* dims); 127 | Tensor& resize(const std::vector& dims); 128 | Tensor& resize_single_dim(int idim, int size); 129 | int count(int start_axis = 0) const; 130 | int device() const{return device_id_;} 131 | 132 | Tensor& to_gpu(bool copy=true); 133 | Tensor& to_cpu(bool copy=true); 134 | 135 | Tensor& to_half(); 136 | Tensor& to_float(); 137 | inline void* cpu() const { ((Tensor*)this)->to_cpu(); return data_->cpu(); } 138 | inline void* gpu() const { ((Tensor*)this)->to_gpu(); return data_->gpu(); } 139 | 140 | template inline const DType* cpu() const { return (DType*)cpu(); } 141 | template inline DType* cpu() { return (DType*)cpu(); } 142 | 143 | template 144 | inline DType* cpu(int i, _Args&& ... args) { return cpu() + offset(i, args...); } 145 | 146 | 147 | template inline const DType* gpu() const { return (DType*)gpu(); } 148 | template inline DType* gpu() { return (DType*)gpu(); } 149 | 150 | template 151 | inline DType* gpu(int i, _Args&& ... args) { return gpu() + offset(i, args...); } 152 | 153 | 154 | template 155 | inline DType& at(int i, _Args&& ... args) { return *(cpu() + offset(i, args...)); } 156 | 157 | std::shared_ptr get_data() const {return data_;} 158 | std::shared_ptr get_workspace() const {return workspace_;} 159 | Tensor& set_workspace(std::shared_ptr workspace) {workspace_ = workspace; return *this;} 160 | 161 | bool is_stream_owner() const {return stream_owner_;} 162 | CUStream get_stream() const{return stream_;} 163 | Tensor& set_stream(CUStream stream, bool owner=false){stream_ = stream; stream_owner_ = owner; return *this;} 164 | 165 | Tensor& set_mat (int n, const cv::Mat& image); 166 | Tensor& set_norm_mat(int n, const cv::Mat& image, float mean[3], float std[3]); 167 | cv::Mat at_mat(int n = 0, int c = 0) { return cv::Mat(height(), width(), CV_32F, cpu(n, c)); } 168 | 169 | Tensor& synchronize(); 170 | const char* shape_string() const{return shape_string_;} 171 | const char* descriptor() const; 172 | 173 | Tensor& copy_from_gpu(size_t offset, const void* src, size_t num_element, int device_id = CURRENT_DEVICE_ID); 174 | Tensor& copy_from_cpu(size_t offset, const void* src, size_t num_element); 175 | 176 | void reference_data(const std::vector& shape, void* cpu_data, size_t cpu_size, void* gpu_data, size_t gpu_size, DataType dtype); 177 | 178 | /** 179 | 180 | # 以下代码是python中加载Tensor 181 | import numpy as np 182 | 183 | def load_tensor(file): 184 | 185 | with open(file, "rb") as f: 186 | binary_data = f.read() 187 | 188 | magic_number, ndims, dtype = np.frombuffer(binary_data, np.uint32, count=3, offset=0) 189 | assert magic_number == 0xFCCFE2E2, f"{file} not a tensor file." 190 | 191 | dims = np.frombuffer(binary_data, np.uint32, count=ndims, offset=3 * 4) 192 | 193 | if dtype == 0: 194 | np_dtype = np.float32 195 | elif dtype == 1: 196 | np_dtype = np.float16 197 | else: 198 | assert False, f"Unsupport dtype = {dtype}, can not convert to numpy dtype" 199 | 200 | return np.frombuffer(binary_data, np_dtype, offset=(ndims + 3) * 4).reshape(*dims) 201 | 202 | **/ 203 | bool save_to_file(const std::string& file) const; 204 | bool load_from_file(const std::string& file); 205 | 206 | private: 207 | Tensor& compute_shape_string(); 208 | Tensor& adajust_memory_by_update_dims_or_type(); 209 | void setup_data(std::shared_ptr data); 210 | 211 | private: 212 | std::vector shape_; 213 | std::vector strides_; 214 | size_t bytes_ = 0; 215 | DataHead head_ = DataHead::Init; 216 | DataType dtype_ = DataType::Float; 217 | CUStream stream_ = nullptr; 218 | bool stream_owner_ = false; 219 | int device_id_ = 0; 220 | char shape_string_[100]; 221 | char descriptor_string_[100]; 222 | std::shared_ptr data_; 223 | std::shared_ptr workspace_; 224 | }; 225 | }; 226 | 227 | #endif // TRT_TENSOR_HPP -------------------------------------------------------------------------------- /src/tensorRT/import_lib.cpp: -------------------------------------------------------------------------------- 1 |  2 | #if defined(_WIN32) 3 | # define U_OS_WINDOWS 4 | #else 5 | # define U_OS_LINUX 6 | #endif 7 | 8 | #ifdef U_OS_WINDOWS 9 | #if defined(_DEBUG) 10 | # pragma comment(lib, "opencv_world346d.lib") 11 | #else 12 | # pragma comment(lib, "opencv_world346.lib") 13 | #endif 14 | 15 | //导入cuda 16 | #pragma comment(lib, "cuda.lib") 17 | #pragma comment(lib, "cudart.lib") 18 | #pragma comment(lib, "cublas.lib") 19 | #pragma comment(lib, "cudnn.lib") 20 | 21 | //导入tensorRT 22 | #pragma comment(lib, "nvinfer.lib") 23 | #pragma comment(lib, "nvinfer_plugin.lib") 24 | //#pragma comment(lib, "nvparsers.lib") 25 | 26 | #if defined(_DEBUG) 27 | #pragma comment(lib, "libprotobufd.lib") 28 | #else 29 | #pragma comment(lib, "libprotobuf.lib") 30 | #endif 31 | 32 | #ifdef HAS_PYTHON 33 | #pragma comment(lib, "python37.lib") 34 | #endif 35 | 36 | #endif // U_OS_WINDOWS -------------------------------------------------------------------------------- /src/tensorRT/infer/trt_infer.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef TRT_INFER_HPP 4 | #define TRT_INFER_HPP 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace TRT { 13 | 14 | class Infer { 15 | public: 16 | virtual void forward(bool sync = true) = 0; 17 | virtual int get_max_batch_size() = 0; 18 | virtual void set_stream(CUStream stream) = 0; 19 | virtual CUStream get_stream() = 0; 20 | virtual void synchronize() = 0; 21 | virtual size_t get_device_memory_size() = 0; 22 | virtual std::shared_ptr get_workspace() = 0; 23 | virtual std::shared_ptr input (int index = 0) = 0; 24 | virtual std::shared_ptr output(int index = 0) = 0; 25 | virtual std::shared_ptr tensor(const std::string& name) = 0; 26 | virtual std::string get_input_name (int index = 0) = 0; 27 | virtual std::string get_output_name(int index = 0) = 0; 28 | virtual bool is_output_name(const std::string& name) = 0; 29 | virtual bool is_input_name (const std::string& name) = 0; 30 | virtual int num_output() = 0; 31 | virtual int num_input() = 0; 32 | virtual void print() = 0; 33 | virtual int device() = 0; 34 | virtual void set_input (int index, std::shared_ptr tensor) = 0; 35 | virtual void set_output(int index, std::shared_ptr tensor) = 0; 36 | virtual std::shared_ptr> serial_engine() = 0; 37 | }; 38 | 39 | struct DeviceMemorySummary { 40 | size_t total; 41 | size_t available; 42 | }; 43 | 44 | DeviceMemorySummary get_current_device_summary(); 45 | int get_device_count(); 46 | int get_device(); 47 | 48 | void set_device(int device_id); 49 | std::shared_ptr load_infer_from_memory(const void* pdata, size_t size); 50 | std::shared_ptr load_infer(const std::string& file); 51 | bool init_nv_plugins(); 52 | 53 | }; //TRTInfer 54 | 55 | 56 | #endif //TRT_INFER_HPP -------------------------------------------------------------------------------- /src/tensorRT/onnx/onnx_pb.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) ONNX Project Contributors. 2 | // Licensed under the MIT license. 3 | 4 | #ifndef ONNX_ONNX_PB_H 5 | #define ONNX_ONNX_PB_H 6 | 7 | // Defines ONNX_EXPORT and ONNX_IMPORT. On Windows, this corresponds to 8 | // different declarations (dllexport and dllimport). On Linux/Mac, it just 9 | // resolves to the same "default visibility" setting. 10 | #if defined(_MSC_VER) 11 | #if defined(ONNX_BUILD_SHARED_LIBS) || defined(ONNX_BUILD_MAIN_LIB) 12 | #define ONNX_EXPORT __declspec(dllexport) 13 | #define ONNX_IMPORT __declspec(dllimport) 14 | #else 15 | #define ONNX_EXPORT 16 | #define ONNX_IMPORT 17 | #endif 18 | #else 19 | #if defined(__GNUC__) 20 | #define ONNX_EXPORT __attribute__((__visibility__("default"))) 21 | #else 22 | #define ONNX_EXPORT 23 | #endif 24 | #define ONNX_IMPORT ONNX_EXPORT 25 | #endif 26 | 27 | // ONNX_API is a macro that, depends on whether you are building the 28 | // main ONNX library or not, resolves to either ONNX_EXPORT or 29 | // ONNX_IMPORT. 30 | // 31 | // This is used in e.g. ONNX's protobuf files: when building the main library, 32 | // it is defined as ONNX_EXPORT to fix a Windows global-variable-in-dll 33 | // issue, and for anyone dependent on ONNX it will be defined as 34 | // ONNX_IMPORT. ONNX_BUILD_MAIN_LIB can also be set when being built 35 | // statically if ONNX is being linked into a shared library that wants 36 | // to export the ONNX APIs and classes. 37 | // 38 | // More details on Windows dllimport / dllexport can be found at 39 | // https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx 40 | // 41 | // This solution is similar to 42 | // https://github.com/pytorch/pytorch/blob/master/caffe2/core/common.h 43 | #define ONNX_API 44 | #include "onnx-ml.pb.h" 45 | 46 | #endif // ! ONNX_ONNX_PB_H 47 | -------------------------------------------------------------------------------- /src/tensorRT/onnx/readme.md: -------------------------------------------------------------------------------- 1 | # ONNX 2 | - 这几个文件来自于对ONNX的编译后提取的结果,由protoc生成的cpp 3 | - https://github.com/onnx/onnx -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ImporterContext.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "onnx2trt.hpp" 8 | #include "onnx2trt_utils.hpp" 9 | #include "onnxErrorRecorder.hpp" 10 | // #include "onnx/common/stl_backports.h" 11 | #include 12 | #include 13 | 14 | namespace onnx2trt 15 | { 16 | 17 | class ErrorRecorderWrapper 18 | { 19 | public: 20 | ErrorRecorderWrapper(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger) 21 | : mNetwork(network) 22 | , mLogger(logger) 23 | { 24 | if (mNetwork) 25 | { 26 | mUserErrorRecorder = mNetwork->getErrorRecorder(); 27 | mOnnxErrorRecorder = ONNXParserErrorRecorder::create(logger, mUserErrorRecorder); 28 | if (mOnnxErrorRecorder) 29 | { 30 | if (mUserErrorRecorder) 31 | { 32 | mUserErrorRecorder->incRefCount(); 33 | } 34 | mNetwork->setErrorRecorder(mOnnxErrorRecorder); 35 | } 36 | } 37 | } 38 | 39 | ~ErrorRecorderWrapper() 40 | { 41 | if (mNetwork && mOnnxErrorRecorder) 42 | { 43 | mNetwork->setErrorRecorder(mUserErrorRecorder); 44 | if (mUserErrorRecorder) 45 | { 46 | mUserErrorRecorder->decRefCount(); 47 | } 48 | ONNXParserErrorRecorder::destroy(mOnnxErrorRecorder); 49 | } 50 | } 51 | 52 | bool hasError() const 53 | { 54 | return mOnnxErrorRecorder != nullptr && mOnnxErrorRecorder->getNbErrors() != 0; 55 | } 56 | 57 | //! Return recorder used by hasError(). 58 | nvinfer1::IErrorRecorder* getErrorRecorder() const 59 | { 60 | return mOnnxErrorRecorder ? mOnnxErrorRecorder : nullptr; 61 | } 62 | private: 63 | nvinfer1::INetworkDefinition* mNetwork{nullptr}; 64 | nvinfer1::ILogger* mLogger{nullptr}; 65 | ONNXParserErrorRecorder* mOnnxErrorRecorder{nullptr}; 66 | nvinfer1::IErrorRecorder* mUserErrorRecorder{nullptr}; 67 | }; 68 | 69 | class ImporterContext final : public IImporterContext 70 | { 71 | nvinfer1::INetworkDefinition* mNetwork; 72 | nvinfer1::ILogger* mLogger; 73 | std::list> mTempBufs; 74 | StringMap mUserInputs; 75 | StringMap mUserOutputs; 76 | StringMap mOpsets; 77 | StringMap mTensors; // All tensors in the graph mapped to their names. 78 | StringMap mTensorLocations; 79 | StringMap mTensorRangeMins; 80 | StringMap mTensorRangeMaxes; 81 | StringMap mLayerPrecisions; 82 | std::set mTensorNames; // Keep track of how many times a tensor name shows up, to avoid duplicate naming in TRT. 83 | std::set mLayerNames; // Keep track of how many times a tensor name shows up, to avoid duplicate naming in TRT. 84 | int64_t mSuffixCounter{0}; // increasing suffix counter used to uniquify layer names. 85 | std::unordered_set mUnsupportedShapeTensors; // Container to hold output tensor names of layers that produce shape tensor outputs but do not natively support them. 86 | StringMap mLoopTensors; // Container to map subgraph tensors to their original outer graph names. 87 | std::string mOnnxFileLocation; // Keep track of the directory of the parsed ONNX file 88 | std::unique_ptr mErrorWrapper; // error recorder to control TRT errors 89 | 90 | public: 91 | ImporterContext(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger) 92 | : mNetwork(network) 93 | , mLogger(logger) 94 | // Disable ErrorRecorder for now due to incompatibilities with ONNXRT. 95 | // , mErrorWrapper(onnx::make_unique(mNetwork, logger)) 96 | , mErrorWrapper(nullptr) 97 | { 98 | } 99 | nvinfer1::INetworkDefinition* network() override 100 | { 101 | return mNetwork; 102 | } 103 | StringMap& tensors() override 104 | { 105 | return mTensors; 106 | } 107 | StringMap& tensorLocations() override 108 | { 109 | return mTensorLocations; 110 | } 111 | StringMap& tensorRangeMins() override 112 | { 113 | return mTensorRangeMins; 114 | } 115 | StringMap& tensorRangeMaxes() override 116 | { 117 | return mTensorRangeMaxes; 118 | } 119 | StringMap& layerPrecisions() override 120 | { 121 | return mLayerPrecisions; 122 | } 123 | std::unordered_set& unsupportedShapeTensors() override 124 | { 125 | return mUnsupportedShapeTensors; 126 | } 127 | StringMap& loopTensors() override 128 | { 129 | return mLoopTensors; 130 | } 131 | void setOnnxFileLocation(std::string location) override 132 | { 133 | mOnnxFileLocation = location; 134 | } 135 | std::string getOnnxFileLocation() override 136 | { 137 | return mOnnxFileLocation; 138 | } 139 | // This actually handles weights as well, but is named this way to be consistent with the tensors() 140 | void registerTensor(TensorOrWeights tensor, const std::string& basename) override 141 | { 142 | // TRT requires unique tensor names. 143 | const std::string uniqueName = generateUniqueName(mTensorNames, basename); 144 | 145 | if (tensor) 146 | { 147 | auto* ctx = this; // To enable logging. 148 | if (tensor.is_tensor()) 149 | { 150 | tensor.tensor().setName(uniqueName.c_str()); 151 | 152 | LOG_VERBOSE("Registering tensor: " << uniqueName << " for ONNX tensor: " << basename); 153 | } 154 | else if (tensor.is_weights()) 155 | { 156 | const auto& weights = tensor.weights(); 157 | if (tensor.weights().type == ::onnx::TensorProto::INT64) 158 | { 159 | tensor = ShapedWeights{::onnx::TensorProto::INT32, 160 | convertINT64(reinterpret_cast(weights.values), weights.shape, ctx), weights.shape}; 161 | } 162 | tensor.weights().setName(basename.c_str()); 163 | } 164 | 165 | } 166 | // Overwrite previous tensors registered with the same name (this only happens when there are subgraphs, 167 | // and in that case, overwriting is the desired behavior). 168 | this->tensors()[basename] = std::move(tensor); 169 | } 170 | 171 | void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) override 172 | { 173 | // No layer will be added for Constant nodes in ONNX. 174 | if (layer) 175 | { 176 | const std::string name = basename.empty() ? layer->getName() : basename; 177 | const std::string uniqueName = generateUniqueName(mLayerNames, name); 178 | 179 | auto* ctx = this; // To enable logging. 180 | LOG_VERBOSE("Registering layer: " << uniqueName << " for ONNX node: " << basename); 181 | 182 | layer->setName(uniqueName.c_str()); 183 | } 184 | } 185 | 186 | nvinfer1::ILogger& logger() override 187 | { 188 | return *mLogger; 189 | } 190 | 191 | ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) override 192 | { 193 | ShapedWeights weights(type, nullptr, shape); 194 | // Need special logic for handling scalars. 195 | if (shape.nbDims == 0) 196 | { 197 | mTempBufs.push_back(std::vector(getDtypeSize(type), value)); 198 | } 199 | else 200 | { 201 | mTempBufs.push_back(std::vector(weights.size_bytes(), value)); 202 | } 203 | weights.values = mTempBufs.back().data(); 204 | return weights; 205 | } 206 | 207 | bool setUserInput(const char* name, nvinfer1::ITensor* input) 208 | { 209 | mUserInputs[name] = input; 210 | return true; 211 | } 212 | bool setUserOutput(const char* name, nvinfer1::ITensor** output) 213 | { 214 | mUserOutputs[name] = output; 215 | return true; 216 | } 217 | nvinfer1::ITensor* getUserInput(const char* name) 218 | { 219 | if (!mUserInputs.count(name)) 220 | { 221 | return nullptr; 222 | } 223 | else 224 | { 225 | return mUserInputs.at(name); 226 | } 227 | } 228 | nvinfer1::ITensor** getUserOutput(const char* name) 229 | { 230 | if (!mUserOutputs.count(name)) 231 | { 232 | return nullptr; 233 | } 234 | else 235 | { 236 | return mUserOutputs.at(name); 237 | } 238 | } 239 | StringMap const& getUserOutputs() const 240 | { 241 | return mUserOutputs; 242 | } 243 | void clearOpsets() 244 | { 245 | mOpsets.clear(); 246 | } 247 | void addOpset(std::string domain, int64_t version) 248 | { 249 | mOpsets.emplace(domain, version); 250 | } 251 | int64_t getOpsetVersion(const char* domain = "") const override 252 | { 253 | if (mOpsets.empty()) 254 | { 255 | return 1; 256 | } 257 | else if (mOpsets.size() == 1) 258 | { 259 | return mOpsets.begin()->second; 260 | } 261 | else 262 | { 263 | assert(mOpsets.count(domain)); 264 | return mOpsets.at(domain); 265 | } 266 | } 267 | bool hasError() const noexcept override 268 | { 269 | return mErrorWrapper != nullptr && mErrorWrapper->hasError(); 270 | } 271 | 272 | nvinfer1::IErrorRecorder* getErrorRecorder() const noexcept override 273 | { 274 | return mErrorWrapper ? mErrorWrapper->getErrorRecorder() : nullptr; 275 | } 276 | private: 277 | std::string generateUniqueName(std::set& namesSet, const std::string& basename) 278 | { 279 | std::string candidate = basename; 280 | 281 | while (namesSet.find(candidate) != namesSet.end()) 282 | { 283 | candidate = basename + "_" + std::to_string(mSuffixCounter); 284 | ++mSuffixCounter; 285 | } 286 | 287 | namesSet.insert(candidate); 288 | 289 | return candidate; 290 | } 291 | }; 292 | 293 | } // namespace onnx2trt 294 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/LoopHelpers.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "LoopHelpers.hpp" 6 | #include "onnx2trt_utils.hpp" 7 | 8 | namespace onnx2trt 9 | { 10 | 11 | nvinfer1::ITensor* addLoopCounter(IImporterContext* ctx, nvinfer1::ILoop* loop, int32_t initial) 12 | { 13 | nvinfer1::ITensor* initialTensor = addConstantScalar(ctx, initial, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 1})->getOutput(0); 14 | nvinfer1::ITensor* one = addConstantScalar(ctx, 1, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 1})->getOutput(0); 15 | 16 | auto counter = loop->addRecurrence(*initialTensor); 17 | nvinfer1::ITensor* addOne = ctx->network()->addElementWise(*counter->getOutput(0), *one, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0); 18 | counter->setInput(1, *addOne); 19 | return counter->getOutput(0); 20 | } 21 | 22 | } // namespace onnx2trt 23 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/LoopHelpers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "ImporterContext.hpp" 10 | 11 | namespace onnx2trt 12 | { 13 | 14 | nvinfer1::ITensor* addLoopCounter(IImporterContext* ctx, nvinfer1::ILoop* loop, int32_t initial = 0); 15 | 16 | } // namespace onnx2trt 17 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ModelImporter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "ImporterContext.hpp" 8 | #include "NvInferPlugin.h" 9 | #include "NvOnnxParser.h" 10 | #include "builtin_op_importers.hpp" 11 | #include "utils.hpp" 12 | 13 | namespace onnx2trt 14 | { 15 | 16 | Status parseGraph(IImporterContext* ctx, const ::onnx::GraphProto& graph, bool deserializingINetwork = false, int* currentNode = nullptr); 17 | 18 | class ModelImporter : public nvonnxparser::IParser 19 | { 20 | protected: 21 | string_map _op_importers; 22 | virtual Status importModel(::onnx::ModelProto const& model); 23 | 24 | private: 25 | ImporterContext _importer_ctx; 26 | std::list<::onnx::ModelProto> _onnx_models; // Needed for ownership of weights 27 | int _current_node; 28 | std::vector _errors; 29 | std::vector _input_dims; 30 | 31 | public: 32 | ModelImporter(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger, const std::vector& input_dims) 33 | : _op_importers(getBuiltinOpImporterMap()) 34 | , _importer_ctx(network, logger) 35 | , _input_dims(input_dims) 36 | { 37 | } 38 | bool parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) override; 39 | bool parse(void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path = nullptr) override; 40 | bool supportsModel(void const* serialized_onnx_model, size_t serialized_onnx_model_size, 41 | SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) override; 42 | 43 | bool supportsOperator(const char* op_name) const override; 44 | void destroy() override 45 | { 46 | delete this; 47 | } 48 | // virtual void registerOpImporter(std::string op, 49 | // NodeImporter const &node_importer) override { 50 | // // Note: This allows existing importers to be replaced 51 | // _op_importers[op] = node_importer; 52 | //} 53 | // virtual Status const &setInput(const char *name, 54 | // nvinfer1::ITensor *input) override; 55 | // virtual Status const& setOutput(const char* name, nvinfer1::ITensor** output) override; 56 | int getNbErrors() const override 57 | { 58 | return _errors.size(); 59 | } 60 | nvonnxparser::IParserError const* getError(int index) const override 61 | { 62 | assert(0 <= index && index < (int) _errors.size()); 63 | return &_errors[index]; 64 | } 65 | void clearErrors() override 66 | { 67 | _errors.clear(); 68 | } 69 | 70 | //...LG: Move the implementation to .cpp 71 | bool parseFromFile(const char* onnxModelFile, int verbosity) override; 72 | bool parseFromData(const void* onnx_data, size_t size, int verbosity) override; 73 | }; 74 | 75 | } // namespace onnx2trt 76 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/NvOnnxParser.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "NvOnnxParser.h" 6 | #include "ModelImporter.hpp" 7 | 8 | extern "C" void* createNvOnnxParser_INTERNAL(void* network_, void* logger_, int version, const std::vector& input_dims) 9 | { 10 | auto network = static_cast(network_); 11 | auto logger = static_cast(logger_); 12 | return new onnx2trt::ModelImporter(network, logger, input_dims); 13 | } 14 | 15 | extern "C" int getNvOnnxParserVersion() 16 | { 17 | return NV_ONNX_PARSER_VERSION; 18 | } -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/NvOnnxParser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #ifndef NV_ONNX_PARSER_H 6 | #define NV_ONNX_PARSER_H 7 | 8 | #include "NvInfer.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | //! 15 | //! \file NvOnnxParser.h 16 | //! 17 | //! This is the API for the ONNX Parser 18 | //! 19 | 20 | #define NV_ONNX_PARSER_MAJOR 0 21 | #define NV_ONNX_PARSER_MINOR 1 22 | #define NV_ONNX_PARSER_PATCH 0 23 | 24 | static const int NV_ONNX_PARSER_VERSION = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH); 25 | 26 | //! \typedef SubGraph_t 27 | //! 28 | //! \brief The data structure containing the parsing capability of 29 | //! a set of nodes in an ONNX graph. 30 | //! 31 | typedef std::pair, bool> SubGraph_t; 32 | 33 | //! \typedef SubGraphCollection_t 34 | //! 35 | //! \brief The data structure containing all SubGraph_t partitioned 36 | //! out of an ONNX graph. 37 | //! 38 | typedef std::vector SubGraphCollection_t; 39 | 40 | //! 41 | //! \namespace nvonnxparser 42 | //! 43 | //! \brief The TensorRT ONNX parser API namespace 44 | //! 45 | namespace nvonnxparser 46 | { 47 | 48 | template 49 | inline int32_t EnumMax(); 50 | 51 | /** \enum ErrorCode 52 | * 53 | * \brief the type of parser error 54 | */ 55 | enum class ErrorCode : int 56 | { 57 | kSUCCESS = 0, 58 | kINTERNAL_ERROR = 1, 59 | kMEM_ALLOC_FAILED = 2, 60 | kMODEL_DESERIALIZE_FAILED = 3, 61 | kINVALID_VALUE = 4, 62 | kINVALID_GRAPH = 5, 63 | kINVALID_NODE = 6, 64 | kUNSUPPORTED_GRAPH = 7, 65 | kUNSUPPORTED_NODE = 8 66 | }; 67 | 68 | template <> 69 | inline int32_t EnumMax() 70 | { 71 | return 9; 72 | } 73 | 74 | /** \class IParserError 75 | * 76 | * \brief an object containing information about an error 77 | */ 78 | class IParserError 79 | { 80 | public: 81 | /** \brief the error code 82 | */ 83 | virtual ErrorCode code() const = 0; 84 | /** \brief description of the error 85 | */ 86 | virtual const char* desc() const = 0; 87 | /** \brief source file in which the error occurred 88 | */ 89 | virtual const char* file() const = 0; 90 | /** \brief source line at which the error occurred 91 | */ 92 | virtual int line() const = 0; 93 | /** \brief source function in which the error occurred 94 | */ 95 | virtual const char* func() const = 0; 96 | /** \brief index of the ONNX model node in which the error occurred 97 | */ 98 | virtual int node() const = 0; 99 | 100 | protected: 101 | virtual ~IParserError() {} 102 | }; 103 | 104 | /** \class IParser 105 | * 106 | * \brief an object for parsing ONNX models into a TensorRT network definition 107 | */ 108 | class IParser 109 | { 110 | public: 111 | /** \brief Parse a serialized ONNX model into the TensorRT network. 112 | * This method has very limited diagnostics. If parsing the serialized model 113 | * fails for any reason (e.g. unsupported IR version, unsupported opset, etc.) 114 | * it the user responsibility to intercept and report the error. 115 | * To obtain a better diagnostic, use the parseFromFile method below. 116 | * 117 | * \param serialized_onnx_model Pointer to the serialized ONNX model 118 | * \param serialized_onnx_model_size Size of the serialized ONNX model 119 | * in bytes 120 | * \param model_path Absolute path to the model file for loading external weights if required 121 | * \return true if the model was parsed successfully 122 | * \see getNbErrors() getError() 123 | */ 124 | virtual bool parse(void const* serialized_onnx_model, 125 | size_t serialized_onnx_model_size, 126 | const char* model_path = nullptr) 127 | = 0; 128 | 129 | /** \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model 130 | * calls parse method inside. 131 | * 132 | * \param File name 133 | * \param Verbosity Level 134 | * 135 | * \return true if the model was parsed successfully 136 | * 137 | */ 138 | virtual bool parseFromFile(const char* onnxModelFile, int verbosity) = 0; 139 | virtual bool parseFromData(const void* onnx_data, size_t size, int verbosity) = 0; 140 | 141 | /** \brief Check whether TensorRT supports a particular ONNX model 142 | * 143 | * \param serialized_onnx_model Pointer to the serialized ONNX model 144 | * \param serialized_onnx_model_size Size of the serialized ONNX model 145 | * in bytes 146 | * \param sub_graph_collection Container to hold supported subgraphs 147 | * \param model_path Absolute path to the model file for loading external weights if required 148 | * \return true if the model is supported 149 | */ 150 | virtual bool supportsModel(void const* serialized_onnx_model, 151 | size_t serialized_onnx_model_size, 152 | SubGraphCollection_t& sub_graph_collection, 153 | const char* model_path = nullptr) 154 | = 0; 155 | 156 | /** \brief Parse a serialized ONNX model into the TensorRT network 157 | * with consideration of user provided weights 158 | * 159 | * \param serialized_onnx_model Pointer to the serialized ONNX model 160 | * \param serialized_onnx_model_size Size of the serialized ONNX model 161 | * in bytes 162 | * \return true if the model was parsed successfully 163 | * \see getNbErrors() getError() 164 | */ 165 | virtual bool parseWithWeightDescriptors( 166 | void const* serialized_onnx_model, size_t serialized_onnx_model_size) 167 | = 0; 168 | 169 | /** \brief Returns whether the specified operator may be supported by the 170 | * parser. 171 | * 172 | * Note that a result of true does not guarantee that the operator will be 173 | * supported in all cases (i.e., this function may return false-positives). 174 | * 175 | * \param op_name The name of the ONNX operator to check for support 176 | */ 177 | virtual bool supportsOperator(const char* op_name) const = 0; 178 | /** \brief destroy this object 179 | * 180 | * \warning deprecated and planned on being removed in TensorRT 10.0 181 | */ 182 | TRT_DEPRECATED virtual void destroy() = 0; 183 | /** \brief Get the number of errors that occurred during prior calls to 184 | * \p parse 185 | * 186 | * \see getError() clearErrors() IParserError 187 | */ 188 | virtual int getNbErrors() const = 0; 189 | /** \brief Get an error that occurred during prior calls to \p parse 190 | * 191 | * \see getNbErrors() clearErrors() IParserError 192 | */ 193 | virtual IParserError const* getError(int index) const = 0; 194 | /** \brief Clear errors from prior calls to \p parse 195 | * 196 | * \see getNbErrors() getError() IParserError 197 | */ 198 | virtual void clearErrors() = 0; 199 | 200 | virtual ~IParser() noexcept = default; 201 | }; 202 | 203 | } // namespace nvonnxparser 204 | 205 | extern "C" TENSORRTAPI void* createNvOnnxParser_INTERNAL(void* network, void* logger, int version, const std::vector& input_dims); 206 | extern "C" TENSORRTAPI int getNvOnnxParserVersion(); 207 | extern "C" TENSORRTAPI void register_layerhook_reshape(const std::function(const std::string& name, const std::vector& shape)>&); 208 | 209 | namespace nvonnxparser 210 | { 211 | 212 | namespace 213 | { 214 | 215 | /** \brief Create a new parser object 216 | * 217 | * \param network The network definition that the parser will write to 218 | * \param logger The logger to use 219 | * \return a new parser object or NULL if an error occurred 220 | * 221 | * Any input dimensions that are constant should not be changed after parsing, 222 | * because correctness of the translation may rely on those constants. 223 | * Changing a dynamic input dimension, i.e. one that translates to -1 in 224 | * TensorRT, to a constant is okay if the constant is consistent with the model. 225 | * 226 | * \see IParser 227 | */ 228 | inline IParser* createParser(nvinfer1::INetworkDefinition& network, nvinfer1::ILogger& logger, const std::vector& input_dims={}) 229 | { 230 | return static_cast(createNvOnnxParser_INTERNAL(&network, &logger, NV_ONNX_PARSER_VERSION, input_dims)); 231 | } 232 | 233 | } // namespace 234 | 235 | } // namespace nvonnxparser 236 | 237 | #endif // NV_ONNX_PARSER_H 238 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/OnnxAttrs.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "OnnxAttrs.hpp" 6 | #include "ShapedWeights.hpp" 7 | #include "onnx2trt_utils.hpp" 8 | #include 9 | 10 | template <> 11 | float OnnxAttrs::get(const std::string& key) const 12 | { 13 | return this->at(key)->f(); 14 | } 15 | 16 | template <> 17 | int OnnxAttrs::get(const std::string& key) const 18 | { 19 | return this->at(key)->i(); 20 | } 21 | 22 | template <> 23 | bool OnnxAttrs::get(const std::string& key) const 24 | { 25 | int value = this->at(key)->i(); 26 | assert(value == bool(value)); 27 | return bool(value); 28 | } 29 | 30 | template <> 31 | std::string OnnxAttrs::get(const std::string& key) const 32 | { 33 | return this->at(key)->s(); 34 | } 35 | 36 | template <> 37 | std::vector OnnxAttrs::get>(const std::string& key) const 38 | { 39 | auto attr = this->at(key)->ints(); 40 | return std::vector(attr.begin(), attr.end()); 41 | } 42 | 43 | template <> 44 | std::vector OnnxAttrs::get>(const std::string& key) const 45 | { 46 | auto attr = this->at(key)->ints(); 47 | return std::vector(attr.begin(), attr.end()); 48 | } 49 | 50 | template <> 51 | std::vector OnnxAttrs::get>(const std::string& key) const 52 | { 53 | auto attr = this->at(key)->floats(); 54 | return std::vector(attr.begin(), attr.end()); 55 | } 56 | 57 | template <> 58 | nvinfer1::Dims OnnxAttrs::get(const std::string& key) const 59 | { 60 | auto values = this->get>(key); 61 | nvinfer1::Dims dims; 62 | dims.nbDims = values.size(); 63 | std::copy(values.begin(), values.end(), dims.d); 64 | // Note: No dimension type information is included 65 | return dims; 66 | } 67 | 68 | template <> 69 | nvinfer1::DimsHW OnnxAttrs::get(const std::string& key) const 70 | { 71 | nvinfer1::Dims dims = this->get(key); 72 | assert(dims.nbDims == 2); 73 | return nvinfer1::DimsHW(dims.d[0], dims.d[1]); 74 | } 75 | 76 | template <> 77 | nvinfer1::Permutation OnnxAttrs::get(const std::string& key) const 78 | { 79 | auto values = this->get>(key); 80 | nvinfer1::Permutation perm; 81 | std::copy(values.begin(), values.end(), perm.order); 82 | // Fill unused values with identity permutation 83 | for (int i = values.size(); i < nvinfer1::Dims::MAX_DIMS; ++i) 84 | { 85 | perm.order[i] = i; 86 | } 87 | return perm; 88 | } 89 | 90 | template <> 91 | onnx2trt::ShapedWeights OnnxAttrs::get(const std::string& key) const 92 | { 93 | ::onnx::TensorProto const& onnx_weights_tensor = this->at(key)->t(); 94 | onnx2trt::ShapedWeights weights; 95 | bool success = convertOnnxWeights(onnx_weights_tensor, &weights, mCtx); 96 | if (!success) 97 | { 98 | throw std::runtime_error{"Unable to convert ONNX weights"}; 99 | } 100 | return weights; 101 | } 102 | 103 | template <> 104 | nvinfer1::DataType OnnxAttrs::get(const std::string& key) const 105 | { 106 | ::onnx::TensorProto::DataType onnx_dtype 107 | = static_cast<::onnx::TensorProto::DataType>(this->at(key)->i()); 108 | nvinfer1::DataType dtype{}; 109 | if (!onnx2trt::convertDtype(onnx_dtype, &dtype)) 110 | { 111 | dtype = static_cast(-1); 112 | } 113 | return dtype; 114 | } 115 | 116 | template <> 117 | std::vector OnnxAttrs::get>(const std::string& key) const 118 | { 119 | auto attr = this->at(key)->ints(); 120 | auto onnx_dtypes = std::vector(attr.begin(), attr.end()); 121 | std::vector dtypes{}; 122 | for (auto onnx_dtype : onnx_dtypes) 123 | { 124 | nvinfer1::DataType dtype{}; 125 | if (!onnx2trt::convertDtype(static_cast(onnx_dtype), &dtype)) 126 | { 127 | dtype = static_cast(-1); 128 | } 129 | dtypes.push_back(dtype); 130 | } 131 | return dtypes; 132 | } 133 | 134 | inline nvinfer1::ActivationType activationStringToEnum(const std::string& type) 135 | { 136 | if (type == "Relu") 137 | { 138 | return nvinfer1::ActivationType::kRELU; 139 | } 140 | if (type == "Tanh") 141 | { 142 | return nvinfer1::ActivationType::kTANH; 143 | } 144 | if (type == "Sigmoid") 145 | { 146 | return nvinfer1::ActivationType::kSIGMOID; 147 | } 148 | if (type == "LeakyRelu") 149 | { 150 | return nvinfer1::ActivationType::kLEAKY_RELU; 151 | } 152 | if (type == "ThresholdedRelu") 153 | { 154 | return nvinfer1::ActivationType::kTHRESHOLDED_RELU; 155 | } 156 | if (type == "ScaledTanh") 157 | { 158 | return nvinfer1::ActivationType::kSCALED_TANH; 159 | } 160 | if (type == "HardSigmoid") 161 | { 162 | return nvinfer1::ActivationType::kHARD_SIGMOID; 163 | } 164 | if (type == "Elu") 165 | { 166 | return nvinfer1::ActivationType::kELU; 167 | } 168 | if (type == "Softsign") 169 | { 170 | return nvinfer1::ActivationType::kSOFTSIGN; 171 | } 172 | if (type == "Softplus") 173 | { 174 | return nvinfer1::ActivationType::kSOFTPLUS; 175 | } 176 | throw std::runtime_error("Unknown activation type: " + type); 177 | } 178 | 179 | template <> 180 | nvinfer1::ActivationType OnnxAttrs::get(const std::string& key) const 181 | { 182 | const std::string type = this->get(key); 183 | return activationStringToEnum(type); 184 | } 185 | 186 | template <> 187 | std::vector OnnxAttrs::get>( 188 | const std::string& key) const 189 | { 190 | const auto strings = this->at(key)->strings(); 191 | std::vector actTypes; 192 | for (const auto& str : strings) 193 | { 194 | actTypes.emplace_back(activationStringToEnum(str)); 195 | } 196 | return actTypes; 197 | } 198 | 199 | template <> 200 | const ::onnx::GraphProto& OnnxAttrs::get(const std::string& key) const 201 | { 202 | return this->at(key)->g(); 203 | } 204 | 205 | template <> 206 | nvinfer1::RNNOperation OnnxAttrs::get(const std::string& key) const 207 | { 208 | std::string op = this->get(key); 209 | if (op == std::string("relu")) 210 | { 211 | return nvinfer1::RNNOperation::kRELU; 212 | } 213 | if (op == std::string("tanh")) 214 | { 215 | return nvinfer1::RNNOperation::kTANH; 216 | } 217 | if (op == std::string("lstm")) 218 | { 219 | return nvinfer1::RNNOperation::kLSTM; 220 | } 221 | if (op == std::string("gru")) 222 | { 223 | return nvinfer1::RNNOperation::kGRU; 224 | } 225 | throw std::runtime_error("Unknown RNNOperation: " + op); 226 | } 227 | 228 | template <> 229 | nvinfer1::RNNInputMode OnnxAttrs::get(const std::string& key) const 230 | { 231 | std::string mode = this->get(key); 232 | if (mode == std::string("skip")) 233 | { 234 | return nvinfer1::RNNInputMode::kSKIP; 235 | } 236 | if (mode == std::string("linear")) 237 | { 238 | return nvinfer1::RNNInputMode::kLINEAR; 239 | } 240 | throw std::runtime_error("Unknown RNNInputMode: " + mode); 241 | } 242 | 243 | template <> 244 | nvinfer1::RNNDirection OnnxAttrs::get(const std::string& key) const 245 | { 246 | std::string direction = this->get(key); 247 | if (direction == std::string("unidirection")) 248 | { 249 | return nvinfer1::RNNDirection::kUNIDIRECTION; 250 | } 251 | if (direction == std::string("bidirection")) 252 | { 253 | return nvinfer1::RNNDirection::kBIDIRECTION; 254 | } 255 | throw std::runtime_error("Unknown RNNDirection: " + direction); 256 | } 257 | 258 | template <> 259 | std::vector OnnxAttrs::get>(const std::string& key) const 260 | { 261 | auto attr = this->at(key)->strings(); 262 | return std::vector(attr.begin(), attr.end()); 263 | } 264 | 265 | template <> 266 | nvinfer1::ScaleMode OnnxAttrs::get(const std::string& key) const 267 | { 268 | std::string s = this->get(key); 269 | if (s == "uniform") 270 | { 271 | return nvinfer1::ScaleMode::kUNIFORM; 272 | } 273 | if (s == "channel") 274 | { 275 | return nvinfer1::ScaleMode::kCHANNEL; 276 | } 277 | if (s == "elementwise") 278 | { 279 | return nvinfer1::ScaleMode::kELEMENTWISE; 280 | } 281 | throw std::runtime_error("Unknown ScaleMode: " + s); 282 | } 283 | 284 | template <> 285 | nvinfer1::MatrixOperation OnnxAttrs::get(const std::string& key) const 286 | { 287 | std::string s = this->get(key); 288 | if (s == "none") 289 | { 290 | return nvinfer1::MatrixOperation::kNONE; 291 | } 292 | if (s == "transpose") 293 | { 294 | return nvinfer1::MatrixOperation::kTRANSPOSE; 295 | } 296 | if (s == "vector") 297 | { 298 | return nvinfer1::MatrixOperation::kVECTOR; 299 | } 300 | throw std::runtime_error("Unknown MatrixOperation: " + s); 301 | } 302 | 303 | template <> 304 | nvinfer1::ResizeMode OnnxAttrs::get(const std::string& key) const 305 | { 306 | const auto& mode = this->get(key); 307 | if (mode == "nearest") 308 | { 309 | return nvinfer1::ResizeMode::kNEAREST; 310 | } 311 | if (mode == "linear") 312 | { 313 | return nvinfer1::ResizeMode::kLINEAR; 314 | } 315 | throw std::runtime_error("Unknown ResizeMode: " + mode); 316 | } 317 | 318 | template <> 319 | nvinfer1::ResizeCoordinateTransformation OnnxAttrs::get( 320 | const std::string& key) const 321 | { 322 | const auto& transformation = this->get(key); 323 | if (transformation == "align_corners") 324 | { 325 | return nvinfer1::ResizeCoordinateTransformation::kALIGN_CORNERS; 326 | } 327 | if (transformation == "asymmetric") 328 | { 329 | return nvinfer1::ResizeCoordinateTransformation::kASYMMETRIC; 330 | } 331 | if (transformation == "half_pixel") 332 | { 333 | return nvinfer1::ResizeCoordinateTransformation::kHALF_PIXEL; 334 | } 335 | throw std::runtime_error("Unknown ResizeCoordinateTransformation: " + transformation); 336 | } 337 | 338 | template <> 339 | nvinfer1::ResizeSelector OnnxAttrs::get(const std::string& key) const 340 | { 341 | const auto& selector = this->get(key); 342 | if (selector == "formula") 343 | { 344 | return nvinfer1::ResizeSelector::kFORMULA; 345 | } 346 | if (selector == "upper") 347 | { 348 | return nvinfer1::ResizeSelector::kUPPER; 349 | } 350 | throw std::runtime_error("Unknown ResizeSelector: " + selector); 351 | } 352 | 353 | template <> 354 | nvinfer1::ResizeRoundMode OnnxAttrs::get(const std::string& key) const 355 | { 356 | const auto& roundMode = this->get(key); 357 | if (roundMode == "half_up") 358 | { 359 | return nvinfer1::ResizeRoundMode::kHALF_UP; 360 | } 361 | if (roundMode == "half_down") 362 | { 363 | return nvinfer1::ResizeRoundMode::kHALF_DOWN; 364 | } 365 | if (roundMode == "floor") 366 | { 367 | return nvinfer1::ResizeRoundMode::kFLOOR; 368 | } 369 | if (roundMode == "ceil") 370 | { 371 | return nvinfer1::ResizeRoundMode::kCEIL; 372 | } 373 | throw std::runtime_error("Unknown ResizeRoundMode: " + roundMode); 374 | } 375 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/OnnxAttrs.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ImporterContext.hpp" 13 | 14 | class OnnxAttrs 15 | { 16 | template 17 | using string_map = std::unordered_map; 18 | typedef string_map<::onnx::AttributeProto const*> AttrMap; 19 | AttrMap _attrs; 20 | onnx2trt::IImporterContext* mCtx; 21 | 22 | public: 23 | explicit OnnxAttrs(::onnx::NodeProto const& onnx_node, onnx2trt::IImporterContext* ctx) 24 | : mCtx{ctx} 25 | { 26 | for (auto const& attr : onnx_node.attribute()) 27 | { 28 | _attrs.insert({attr.name(), &attr}); 29 | } 30 | } 31 | 32 | bool count(const std::string& key) const 33 | { 34 | return _attrs.count(key); 35 | } 36 | 37 | ::onnx::AttributeProto const* at(std::string key) const 38 | { 39 | if (!_attrs.count(key)) 40 | { 41 | throw std::out_of_range("Attribute not found: " + key); 42 | } 43 | return _attrs.at(key); 44 | } 45 | 46 | ::onnx::AttributeProto::AttributeType type(const std::string& key) const 47 | { 48 | return this->at(key)->type(); 49 | } 50 | 51 | 52 | template 53 | T get(const std::string& key) const; 54 | 55 | template 56 | T get(const std::string& key, T const& default_value) const 57 | { 58 | return _attrs.count(key) ? this->get(key) : default_value; 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/RNNHelpers.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "RNNHelpers.hpp" 6 | #include "LoopHelpers.hpp" 7 | #include "onnx2trt_utils.hpp" 8 | #include 9 | 10 | namespace onnx2trt 11 | { 12 | 13 | nvinfer1::ITensor* addRNNInput(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, std::vector& inputs, const std::string& direction) 14 | { 15 | // In the forward/reverse cases, we only use a single iterator. In the bidirectional case, a forward and reverse 16 | // iterator must be concatenated. 17 | // Input dimensions: [1, B, E] 18 | nvinfer1::ITensor* iterationInput{nullptr}; 19 | nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx); 20 | 21 | const int sequenceLenIndex = 4; 22 | bool isRagged = inputs.size() > sequenceLenIndex && inputs.at(sequenceLenIndex); 23 | 24 | if (direction == "forward") 25 | { 26 | iterationInput = unsqueezeTensor(ctx, node, *loop->addIterator(*input)->getOutput(0), std::vector{0}); 27 | 28 | if (isRagged) 29 | { 30 | nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx); 31 | auto maxLen = getAxisLength(ctx, input, 0); 32 | iterationInput = clearMissingSequenceElements(ctx, node, loop, seqLens, iterationInput, maxLen); 33 | } 34 | } 35 | else if (direction == "reverse") 36 | { 37 | nvinfer1::IIteratorLayer* reverseIterator = loop->addIterator(*input); 38 | reverseIterator->setReverse(true); 39 | iterationInput = unsqueezeTensor(ctx, node, *reverseIterator->getOutput(0), std::vector{0}); 40 | if (isRagged) 41 | { 42 | nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx); 43 | auto maxLen = getAxisLength(ctx, input, 0); 44 | iterationInput = clearMissingSequenceElements(ctx, node, loop, seqLens, iterationInput, maxLen, true); 45 | } 46 | } 47 | else if (direction == "bidirectional") 48 | { 49 | nvinfer1::IIteratorLayer* forward = loop->addIterator(*input); 50 | nvinfer1::IIteratorLayer* reverse = loop->addIterator(*input); 51 | reverse->setReverse(true); 52 | 53 | auto forwardInput = unsqueezeTensor(ctx, node, *forward->getOutput(0), std::vector{0}); 54 | auto reverseInput = unsqueezeTensor(ctx, node, *reverse->getOutput(0), std::vector{0}); 55 | if (isRagged) 56 | { 57 | nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(sequenceLenIndex), ctx); 58 | auto counter = addLoopCounter(ctx, loop); 59 | auto maxLen = getAxisLength(ctx, input, 0); 60 | forwardInput = clearMissingSequenceElements(ctx, node, loop, seqLens, forwardInput, maxLen, false, counter); 61 | reverseInput = clearMissingSequenceElements(ctx, node, loop, seqLens, reverseInput, maxLen, true, counter); 62 | } 63 | 64 | // Stack on the 0th axis to create a (numDirections, B, E) tensor. 65 | std::array tensors{{forwardInput, reverseInput}}; 66 | nvinfer1::IConcatenationLayer* concat = ctx->network()->addConcatenation(tensors.data(), 2); 67 | concat->setAxis(0); 68 | iterationInput = concat->getOutput(0); 69 | } 70 | if (iterationInput) 71 | { 72 | LOG_VERBOSE("Input shape: " << iterationInput->getDimensions()); 73 | } 74 | return iterationInput; 75 | } 76 | 77 | nvinfer1::ITensor* clearMissingSequenceElements(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, 78 | nvinfer1::ITensor* seqLens, nvinfer1::ITensor* toMask, nvinfer1::ITensor* maxLen, bool reverse, 79 | nvinfer1::ITensor* counter) 80 | { 81 | nvinfer1::ITensor* zero 82 | = addConstantScalar(ctx, 0.f, ::onnx::TensorProto::FLOAT, nvinfer1::Dims3(1, 1, 1))->getOutput(0); 83 | nvinfer1::ITensor* seqMask = getRaggedMask(ctx, node, loop, seqLens, maxLen, reverse, counter); 84 | return ctx->network()->addSelect(*seqMask, *toMask, *zero)->getOutput(0); 85 | } 86 | 87 | nvinfer1::ITensor* maskRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, 88 | nvinfer1::ITensor* prevH, nvinfer1::ITensor* Ht, nvinfer1::ITensor* maxLen, bool reverse, 89 | nvinfer1::ITensor* counter) 90 | { 91 | // maxLen must be provided if reverse is true 92 | // Forwards previous hidden state if invalid 93 | nvinfer1::ITensor* valid = getRaggedMask(ctx, node, loop, seqLens, maxLen, reverse, counter); 94 | return ctx->network()->addSelect(*valid, *Ht, *prevH)->getOutput(0); 95 | } 96 | 97 | nvinfer1::ITensor* maskBidirRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, 98 | nvinfer1::ITensor* maxLen, nvinfer1::ITensor* Ht1, nvinfer1::ITensor* Ht, nvinfer1::ITensor* singlePassShape) 99 | { 100 | // Splits hidden state into forward and backward states, masks each accordingly, then concatenates 101 | 102 | nvinfer1::ITensor* forwardStart 103 | = addConstant(ctx, std::vector{0, 0, 0}, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 3}) 104 | ->getOutput(0); 105 | nvinfer1::ITensor* reverseStart 106 | = addConstant(ctx, std::vector{1, 0, 0}, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 3}) 107 | ->getOutput(0); 108 | 109 | nvinfer1::ISliceLayer* HtForwardLayer 110 | = ctx->network()->addSlice(*Ht, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1}); 111 | HtForwardLayer->setInput(1, *forwardStart); 112 | HtForwardLayer->setInput(2, *singlePassShape); 113 | 114 | nvinfer1::ISliceLayer* HtBackwardLayer 115 | = ctx->network()->addSlice(*Ht, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1}); 116 | HtBackwardLayer->setInput(1, *reverseStart); 117 | HtBackwardLayer->setInput(2, *singlePassShape); 118 | 119 | nvinfer1::ISliceLayer* Ht1ForwardLayer 120 | = ctx->network()->addSlice(*Ht1, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1}); 121 | Ht1ForwardLayer->setInput(1, *forwardStart); 122 | Ht1ForwardLayer->setInput(2, *singlePassShape); 123 | 124 | nvinfer1::ISliceLayer* Ht1BackwardLayer 125 | = ctx->network()->addSlice(*Ht1, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1}); 126 | Ht1BackwardLayer->setInput(1, *reverseStart); 127 | Ht1BackwardLayer->setInput(2, *singlePassShape); 128 | 129 | auto forwardHt = HtForwardLayer->getOutput(0); 130 | auto backwardHt = HtBackwardLayer->getOutput(0); 131 | auto forwardHt1 = Ht1ForwardLayer->getOutput(0); 132 | auto backwardHt1 = Ht1BackwardLayer->getOutput(0); 133 | 134 | auto counter = addLoopCounter(ctx, loop, 0); 135 | forwardHt = maskRNNHidden(ctx, node, loop, seqLens, forwardHt1, forwardHt, maxLen, false, counter); 136 | backwardHt = maskRNNHidden(ctx, node, loop, seqLens, backwardHt1, backwardHt, maxLen, true, counter); 137 | std::array tensors{{forwardHt, backwardHt}}; 138 | nvinfer1::IConcatenationLayer* concat = ctx->network()->addConcatenation(tensors.data(), 2); 139 | concat->setAxis(0); 140 | return concat->getOutput(0); 141 | } 142 | 143 | nvinfer1::ITensor* getRaggedMask(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, 144 | nvinfer1::ITensor* maxLen, bool reverse, nvinfer1::ITensor* counter) 145 | { 146 | // Returns a bool tensor which is true where the elements are valid (within the sequence) and false when outside the 147 | // sequence. 148 | // maxLen must be provided if reverse is true 149 | assert(!reverse || maxLen); 150 | 151 | if (!counter) 152 | { 153 | counter = addLoopCounter(ctx, loop, 0); 154 | } 155 | 156 | // Create Mask 157 | nvinfer1::ITensor* seqMask; 158 | if (reverse) 159 | { 160 | counter 161 | = ctx->network() 162 | ->addElementWise(*unsqueezeTensor(ctx, node, *maxLen, {0}), *counter, nvinfer1::ElementWiseOperation::kSUB) 163 | ->getOutput(0); 164 | seqMask 165 | = ctx->network()->addElementWise(*seqLens, *counter, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0); 166 | seqMask = ctx->network()->addUnary(*seqMask, nvinfer1::UnaryOperation::kNOT)->getOutput(0); 167 | } 168 | else 169 | { 170 | seqMask 171 | = ctx->network()->addElementWise(*counter, *seqLens, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0); 172 | } 173 | return unsqueezeTensor(ctx, node, *seqMask, std::vector{0, 2}); 174 | } 175 | 176 | } // namespace onnx2trt 177 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/RNNHelpers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "TensorOrWeights.hpp" 12 | #include "ImporterContext.hpp" 13 | 14 | namespace onnx2trt 15 | { 16 | 17 | nvinfer1::ITensor* addRNNInput(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, std::vector& inputs, const std::string& direction); 18 | 19 | // Zeros out invalid timesteps in toMask. maxLen must be provided if reverse is true 20 | nvinfer1::ITensor* clearMissingSequenceElements(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* toMask, nvinfer1::ITensor* maxLen, bool reverse = false, nvinfer1::ITensor* counter = nullptr); 21 | 22 | // Returns a bool tensor which is true during valid timesteps 23 | nvinfer1::ITensor* getRaggedMask(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* maxLen = nullptr, bool reverse = false, nvinfer1::ITensor* counter = nullptr); 24 | 25 | // Selects between prevH and Ht to forward previous hidden state through invalid timesteps 26 | nvinfer1::ITensor* maskRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* prevH, nvinfer1::ITensor* Ht, nvinfer1::ITensor* maxLen = nullptr, bool reverse = false, nvinfer1::ITensor* counter = nullptr); 27 | 28 | // Splits a bidirectional hidden state into forward and reverse passes, masks each using maskRNNHidden, then concatenates 29 | nvinfer1::ITensor* maskBidirRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* maxLen, nvinfer1::ITensor* Ht1, nvinfer1::ITensor* Ht, nvinfer1::ITensor* singlePassShape); 30 | 31 | } // namespace onnx2trt 32 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ShapeTensor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "ShapeTensor.hpp" 6 | #include "TensorOrWeights.hpp" 7 | #include "onnx2trt_utils.hpp" 8 | #include 9 | #include 10 | 11 | namespace onnx2trt 12 | { 13 | 14 | //! If true, tolerate bug where scalar constant of type FLOAT is missing its value, 15 | //! and a shape tensor is expected. 16 | static const bool gTolerateTRT_12408 = true; 17 | 18 | ShapeTensor::ShapeTensor(int rank_, std::vector&& values_) 19 | : mDepth(0) 20 | , mAllValuesKnown(true) 21 | , mRank(rank_) 22 | , mSize(values_.size()) 23 | , mValues(std::move(values_)) 24 | { 25 | assert((rank_ == 0 || rank_ == 1) && "shape tensor must have rank 0 or 1"); 26 | assert(rank_ > 0 || mValues.size() == 1); 27 | } 28 | 29 | ShapeTensor::ShapeTensor(IImporterContext* ctx, TensorOrWeights& t) 30 | : mDepth(0) 31 | { 32 | if (t.is_tensor()) 33 | { 34 | *this = ShapeTensor(t.tensor()); 35 | } 36 | else 37 | { 38 | const nvinfer1::Dims d = t.shape(); 39 | const auto& weights = t.weights(); 40 | if (gTolerateTRT_12408 && weights.type == ::onnx::TensorProto::FLOAT && d.nbDims == 0 && weights.count() == 0) 41 | { 42 | LOG_WARNING("Scalar constant of type FLOAT with no value encountered where ONNX specification requires tensor describing a shape. Assuming it's an INT64 empty vector."); 43 | mRank = 1; 44 | mSize = 0; 45 | mAllValuesKnown = true; 46 | return; 47 | } 48 | assert(0 <= d.nbDims); 49 | assert(d.nbDims <= 1 && "shape tensor must be 0D or 1D"); 50 | mRank = d.nbDims; 51 | mSize = d.nbDims == 0 ? 1 : d.d[0]; 52 | auto status = weightsToVector(weights, &mValues); 53 | if (status.code() != ErrorCode::kSUCCESS) 54 | { 55 | throw std::runtime_error("constant " + t.getName() + " is not a valid shape tensor"); 56 | } 57 | mAllValuesKnown = true; 58 | } 59 | } 60 | 61 | static bool hasAllNonNegativeValues(const std::vector& values) 62 | { 63 | return std::all_of(values.begin(), values.end(), [](int x) { return x >= 0; }); 64 | } 65 | 66 | ShapeTensor::ShapeTensor(nvinfer1::ITensor& t, int depth) 67 | : mDepth(depth) 68 | , mRank(1) 69 | , mTensor(&t) 70 | { 71 | const nvinfer1::Dims dims = t.getDimensions(); 72 | 73 | switch (mDepth) 74 | { 75 | case 0: 76 | assert(t.getType() == nvinfer1::DataType::kINT32); 77 | mRank = dims.nbDims; 78 | if (mRank == 0) 79 | { 80 | mSize = 1; 81 | } 82 | else if (mRank == 1) 83 | { 84 | mSize = dims.d[0]; 85 | } 86 | else 87 | { 88 | assert(mRank == -1); 89 | } 90 | break; 91 | 92 | case 1: 93 | if (dims.nbDims >= 0) 94 | { 95 | mSize = dims.nbDims; 96 | mValues.resize(dims.nbDims); 97 | std::copy_n(dims.d, dims.nbDims, mValues.begin()); 98 | mAllValuesKnown = hasAllNonNegativeValues(mValues); 99 | } 100 | break; 101 | 102 | case 2: 103 | mSize = 1; 104 | if (dims.nbDims >= 0) 105 | { 106 | mValues = {dims.nbDims}; 107 | mAllValuesKnown = hasAllNonNegativeValues(mValues); 108 | } 109 | break; 110 | 111 | case 3: 112 | // Applying IShapeLayer three times always yields a 1D vector containing 1. 113 | mDepth = 0; 114 | mSize = 1; 115 | mValues = {1}; 116 | mAllValuesKnown = true; 117 | mTensor = nullptr; 118 | break; 119 | 120 | default: 121 | // Though depths greater than 3 could be handled the same as 3, they are 122 | // likely a sign of a problem. Depths less than 0 make no sense. 123 | assert(0); 124 | break; 125 | } 126 | } 127 | 128 | ShapeTensor shapeVector(int64_t value) 129 | { 130 | return ShapeTensor(1, std::vector({value})); 131 | } 132 | 133 | ShapeTensor shapeScalar(int64_t value) 134 | { 135 | return ShapeTensor(0, std::vector({value})); 136 | } 137 | 138 | bool ShapeTensor::valueKnown(int k) const 139 | { 140 | assert(0 <= k); 141 | assert(k < mSize); 142 | return allValuesKnown() || (mValues.size() == static_cast(mSize) && mValues[k] >= 0); 143 | } 144 | 145 | bool ShapeTensor::isAll(int64_t x) const 146 | { 147 | assert(mDepth >= 0 && "undefined tensor"); 148 | return allValuesKnown() && std::all_of(begin(), end(), [x](int64_t y) { return x == y; }); 149 | } 150 | 151 | nvinfer1::ITensor& ShapeTensor::tensor(IImporterContext* ctx) const 152 | { 153 | assert(mDepth >= 0 && "undefined tensor"); 154 | assert(mDepth <= 2); 155 | if (!mTensor || mDepth != 0) 156 | { 157 | // Need to create an ITensor representing *this. 158 | if (allValuesKnown()) 159 | { 160 | // Create constant 161 | const nvinfer1::Dims dims{rank(), {size()}}; 162 | const nvinfer1::Weights w{nvinfer1::DataType::kINT32, convertINT64(mValues.data(), dims, ctx), size()}; 163 | mTensor = ctx->network()->addConstant(dims, w)->getOutput(0); 164 | mDepth = 0; 165 | } 166 | else 167 | { 168 | assert(mTensor); 169 | for (; mDepth > 0; --mDepth) 170 | { 171 | mTensor = ctx->network()->addShape(*mTensor)->getOutput(0); 172 | } 173 | } 174 | } 175 | return *mTensor; 176 | } 177 | 178 | ShapeTensor iotaShapeVector(int32_t n) 179 | { 180 | std::vector values(n); 181 | std::iota(values.begin(), values.end(), 0); 182 | return ShapeTensor(1, std::move(values)); 183 | } 184 | 185 | ShapeTensor similar(IImporterContext* ctx, const ShapeTensor& exemplar, int64_t value) 186 | { 187 | return fillShapeVector(ctx, value, shapeOf(exemplar)); 188 | } 189 | 190 | ShapeTensor fillShapeVector(IImporterContext* ctx, int64_t value, const ShapeTensor& count) 191 | { 192 | assert(count.rank() == 1 && "implementation assumes 1D size"); 193 | assert(count.size() == 1 && "implementation assumes 1D size of known size"); 194 | if (count.allValuesKnown()) 195 | { 196 | return ShapeTensor(1, std::vector(count[0], value)); 197 | } 198 | else 199 | { 200 | nvinfer1::ISliceLayer* slice 201 | = addSlice(ctx, shapeVector(value).tensor(ctx), shapeVector(0), count, shapeVector(0)); 202 | return ShapeTensor(*slice->getOutput(0)); 203 | } 204 | } 205 | 206 | using nvinfer1::ElementWiseOperation; 207 | 208 | //! Helper that implements an elementwise operations on two shape tensors x and y. 209 | //! f must implement the operation on a pair of int64_t. 210 | //! commutes should be true f is commutative. 211 | //! rightIdentity should be the right identity value for f. 212 | static ShapeTensor op(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y, ElementWiseOperation operation, 213 | bool commutative, int64_t rightIdentity, const std::function&& f) 214 | { 215 | assert(!x.rankKnown() || !y.rankKnown() || x.rank() == y.rank()); 216 | if (x.sizeKnown() && y.sizeKnown()) 217 | { 218 | assert(x.size() == 1 || y.size() == 1 || x.size() == y.size()); 219 | if (y.isAll(rightIdentity) && y.size() <= x.size()) 220 | { 221 | return x; 222 | } 223 | if (commutative && x.isAll(rightIdentity) && x.size() <= y.size()) 224 | { 225 | return y; 226 | } 227 | } 228 | if (x.allValuesKnown() && y.allValuesKnown()) 229 | { 230 | std::vector values(std::max(x.size(), y.size())); 231 | for (size_t i = 0; i < values.size(); ++i) 232 | { 233 | // The % simulates broadcast rules. 234 | values[i] = f(x[i % x.size()], y[i % y.size()]); 235 | } 236 | return ShapeTensor(x.rank(), std::move(values)); 237 | } 238 | return ShapeTensor(*ctx->network()->addElementWise(x.tensor(ctx), y.tensor(ctx), operation)->getOutput(0), 0); 239 | } 240 | 241 | ShapeTensor add(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 242 | { 243 | return op(ctx, x, y, ElementWiseOperation::kSUM, true, 0, std::plus()); 244 | } 245 | 246 | ShapeTensor sub(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 247 | { 248 | return op(ctx, x, y, ElementWiseOperation::kSUB, false, 0, std::minus()); 249 | } 250 | 251 | ShapeTensor mul(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 252 | { 253 | return op(ctx, x, y, ElementWiseOperation::kPROD, true, 1, std::multiplies()); 254 | } 255 | 256 | ShapeTensor min(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 257 | { 258 | return op(ctx, x, y, ElementWiseOperation::kMIN, true, std::numeric_limits::max(), 259 | [](int64_t x, int64_t y) { return std::min(x, y); }); 260 | } 261 | 262 | ShapeTensor max(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 263 | { 264 | return op(ctx, x, y, ElementWiseOperation::kMAX, true, std::numeric_limits::min(), 265 | [](int64_t x, int64_t y) { return std::max(x, y); }); 266 | } 267 | ShapeTensor floorDiv(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 268 | { 269 | return op(ctx, x, y, ElementWiseOperation::kFLOOR_DIV, false, 1, [](int64_t x, int64_t y) { 270 | assert(y != 0 && "divisor must be non-zero"); 271 | const int64_t d = x / y; 272 | return d * y == x ? d : d - ((x < 0) ^ (y < 0)); 273 | }); 274 | } 275 | 276 | ShapeTensor broadcast(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 277 | { 278 | // max(x,y) works unless x or y is 0. 279 | // min(x,y,1) yields 0 if x or y is 0, and 1 otherwise. 280 | // So compute max(x,y)*min(x,y,1). 281 | return mul(ctx, max(ctx, x, y), min(ctx, x, min(ctx, y, similar(ctx, y, 1)))); 282 | } 283 | 284 | ShapeTensor product(IImporterContext* ctx, const ShapeTensor& x, int first, int last, int rank) 285 | { 286 | assert(first <= last); 287 | ShapeTensor z(rank, std::vector(1, 1)); 288 | for (int i = first; i < last; ++i) 289 | { 290 | z = mul(ctx, z, gather(ctx, x, ShapeTensor(rank, std::vector(1, i)))); 291 | } 292 | return z; 293 | } 294 | 295 | ShapeTensor concat(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y) 296 | { 297 | assert(!x.rankKnown() || x.rank() == 1); 298 | assert(!y.rankKnown() || y.rank() == 1); 299 | if (x.sizeKnown() && x.size() == 0) 300 | { 301 | return y; 302 | } 303 | if (y.sizeKnown() && y.size() == 0) 304 | { 305 | return x; 306 | } 307 | if (x.allValuesKnown() && y.allValuesKnown()) 308 | { 309 | std::vector values(x.size() + y.size()); 310 | auto p = std::copy(x.begin(), x.end(), values.begin()); 311 | std::copy(y.begin(), y.end(), p); 312 | return ShapeTensor(1, std::move(values)); 313 | } 314 | 315 | nvinfer1::ITensor* const args[2] = {&x.tensor(ctx), &y.tensor(ctx)}; 316 | return ShapeTensor(*ctx->network()->addConcatenation(args, 2)->getOutput(0)); 317 | } 318 | 319 | ShapeTensor gather(IImporterContext* ctx, const ShapeTensor& data, const ShapeTensor& indices) 320 | { 321 | assert(data.rank() == 1); 322 | if (indices.allValuesKnown() 323 | && std::all_of(indices.begin(), indices.end(), [&data](int i) { return data.valueKnown(i); })) 324 | { 325 | std::vector z(indices.size()); 326 | std::transform(indices.begin(), indices.end(), z.begin(), [&data](int64_t i) { 327 | assert(0 <= i); 328 | assert(i < data.size()); 329 | return data[i]; 330 | }); 331 | return ShapeTensor(indices.rank(), std::move(z)); 332 | } 333 | return ShapeTensor(*ctx->network()->addGather(data.tensor(ctx), indices.tensor(ctx), 0)->getOutput(0)); 334 | } 335 | 336 | ShapeTensor shapeOf(nvinfer1::ITensor& tensor) 337 | { 338 | return ShapeTensor(tensor, 1); 339 | } 340 | 341 | ShapeTensor shapeOf(TensorOrWeights& t) 342 | { 343 | if (t.is_tensor()) 344 | { 345 | return shapeOf(t.tensor()); 346 | } 347 | const nvinfer1::Dims& d = t.weights().shape; 348 | return ShapeTensor(1, std::vector(d.d, d.d + d.nbDims)); 349 | } 350 | 351 | ShapeTensor shapeOf(const ShapeTensor& t) 352 | { 353 | assert(t.mDepth >= 0); 354 | if (t.mTensor) 355 | { 356 | return ShapeTensor(*t.mTensor, t.mDepth + 1); 357 | } 358 | assert(t.rankKnown()); 359 | assert(t.sizeKnown()); 360 | // ShapeTensor is either a scalar or vector. 361 | // shape of a scalar is an empty tensor. 362 | // shape of a vector is a one-element tensor containing the length of the vector. 363 | return t.rank() == 0 ? ShapeTensor(0, {}) : ShapeTensor(1, {t.size()}); 364 | } 365 | 366 | ShapeTensor convertTo1D(IImporterContext* ctx, const ShapeTensor& tensor) 367 | { 368 | assert(tensor.rank() == 0); 369 | assert(tensor.size() == 1); 370 | if (tensor.valueKnown(0)) 371 | { 372 | return shapeScalar(tensor[0]); 373 | } 374 | return ShapeTensor(*addShuffle(ctx, tensor.tensor(ctx), shapeVector(1))->getOutput(0)); 375 | } 376 | 377 | //! If all values of x are known, return Dims with those values, 378 | //! but throw exception if any value is outside specified bounds. 379 | //! Otherwise return Dims with zeros. 380 | //! 381 | //! The string that should describe the context of the dimensions, 382 | //! e.g. "reshape" or "fill output". 383 | static nvinfer1::Dims toDims(const ShapeTensor& x, const char* what, int32_t minAllowed, int32_t maxAllowed) 384 | { 385 | nvinfer1::Dims d{-1, {}}; 386 | if (x.sizeKnown()) 387 | { 388 | d.nbDims = x.size(); 389 | if (x.allValuesKnown()) 390 | { 391 | assert(x.size() <= nvinfer1::Dims::MAX_DIMS); 392 | for (const auto& dim : x) 393 | { 394 | if (dim < minAllowed || dim > maxAllowed) 395 | { 396 | std::ostringstream msg; 397 | msg << what << " dimensions have value " << dim << " beyond allowed bounds." << std::endl; 398 | throw std::runtime_error(msg.str()); 399 | } 400 | } 401 | std::copy(x.begin(), x.end(), d.d); 402 | } 403 | } 404 | return d; 405 | } 406 | 407 | //! If not all values in x are known, set layer input specifed by inputIndex 408 | //! to tensor with value of x. 409 | static void setShapeInputIfDynamic(IImporterContext* ctx, nvinfer1::ILayer* layer, int inputIndex, const ShapeTensor& x) 410 | { 411 | if (!x.allValuesKnown()) 412 | { 413 | layer->setInput(inputIndex, x.tensor(ctx)); 414 | } 415 | } 416 | 417 | bool operator==(const ShapeTensor& x, const ShapeTensor& y) 418 | { 419 | if (x.allValuesKnown() && y.allValuesKnown()) 420 | { 421 | return x.mValues == y.mValues; 422 | } 423 | assert(x.mTensor || y.mTensor); 424 | return x.mTensor == y.mTensor && x.mDepth == y.mDepth; 425 | } 426 | 427 | nvinfer1::ITensor& reshape(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& newShape) 428 | { 429 | const ShapeTensor oldShape = shapeOf(data); 430 | if (newShape == oldShape) 431 | { 432 | return data; 433 | } 434 | return *addShuffle(ctx, data, newShape)->getOutput(0); 435 | } 436 | 437 | nvinfer1::IShuffleLayer* addShuffle( 438 | IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& reshapeDims, bool zeroIsPlaceholder) 439 | { 440 | nvinfer1::IShuffleLayer* shuffle = ctx->network()->addShuffle(data); 441 | if (reshapeDims.allValuesKnown()) 442 | { 443 | shuffle->setReshapeDimensions(toDims(reshapeDims, "reshape", -1, std::numeric_limits::max())); 444 | } 445 | else 446 | { 447 | shuffle->setInput(1, reshapeDims.tensor(ctx)); 448 | } 449 | shuffle->setZeroIsPlaceholder(zeroIsPlaceholder); 450 | return shuffle; 451 | } 452 | 453 | nvinfer1::ISliceLayer* addSlice(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& starts, 454 | const ShapeTensor& sizes, const ShapeTensor& strides) 455 | { 456 | constexpr int32_t minDim = std::numeric_limits::min(); 457 | constexpr int32_t maxDim = std::numeric_limits::max(); 458 | nvinfer1::ISliceLayer* slice = ctx->network()->addSlice(data, toDims(starts, "slice start", 0, maxDim), 459 | toDims(sizes, "slice size", 0, maxDim), toDims(strides, "slide strides", minDim, maxDim)); 460 | setShapeInputIfDynamic(ctx, slice, 1, starts); 461 | setShapeInputIfDynamic(ctx, slice, 2, sizes); 462 | setShapeInputIfDynamic(ctx, slice, 3, strides); 463 | return slice; 464 | } 465 | 466 | nvinfer1::IFillLayer* addFill(IImporterContext* ctx, const ShapeTensor& shape, nvinfer1::FillOperation op) 467 | { 468 | nvinfer1::IFillLayer* fill 469 | = ctx->network()->addFill(toDims(shape, "fill output", 0, std::numeric_limits::max()), op); 470 | setShapeInputIfDynamic(ctx, fill, 0, shape); 471 | return fill; 472 | } 473 | 474 | std::ostream& operator<<(std::ostream& stream, const ShapeTensor& x) 475 | { 476 | stream << "("; 477 | for (int i = 0, e = x.size(); i < e; ++i) 478 | { 479 | stream << (i ? ", " : ""); 480 | if (x.valueKnown(i)) 481 | { 482 | stream << x[i]; 483 | } 484 | else 485 | { 486 | stream << "_"; 487 | } 488 | } 489 | if (x.size() == 1 && x.rank() == 1) 490 | { 491 | // Use Python convention to distinguish 1-element vector from a scalar. 492 | stream << ","; 493 | } 494 | return stream << ")"; 495 | } 496 | 497 | } // namespace onnx2trt 498 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ShapeTensor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace onnx2trt 13 | { 14 | 15 | class IImporterContext; 16 | class TensorOrWeights; 17 | 18 | //! Represents a 0D or 1D tensor of int64_t. 19 | class ShapeTensor 20 | { 21 | public: 22 | //! Create undefined ShapeTensor. 23 | ShapeTensor() = default; 24 | 25 | //! Create ShapeTensor with known rank and values. 26 | ShapeTensor(int rank_, std::vector&& values_); 27 | 28 | //! Create ShapeTensor representing value of TensorOrWeights. 29 | ShapeTensor(IImporterContext* ctx, TensorOrWeights& t); 30 | 31 | //! Construct ShapeTensor equivalent to applying IShapeLayer depth times. 32 | //! The depth may be in [0,3]. 33 | explicit ShapeTensor(nvinfer1::ITensor& t, int depth = 0); 34 | 35 | //! True if rank is known. 36 | bool rankKnown() const 37 | { 38 | return mRank != kRANK_UNKNOWN; 39 | } 40 | 41 | //! Number of dimensions. Always 0 or 1. 42 | int32_t rank() const 43 | { 44 | assert(rankKnown()); 45 | return mRank; 46 | } 47 | 48 | //! True if number of elements in tensor is known. 49 | bool sizeKnown() const 50 | { 51 | return mSize != kSIZE_UNKNOWN; 52 | } 53 | 54 | //! Number of elements in the tensor. Asserts that sizeKnown()==true. 55 | int32_t size() const 56 | { 57 | assert(sizeKnown()); 58 | return mSize; 59 | } 60 | 61 | //! True if all element values are known. 62 | bool allValuesKnown() const 63 | { 64 | return mAllValuesKnown; 65 | } 66 | 67 | //! True if all element values equal the given value. 68 | bool isAll(int64_t value) const; 69 | 70 | using const_iterator = std::vector::const_iterator; 71 | 72 | //! Iterator pointing to beginning of sequence of element values. 73 | //! Requires that allValuesKnown() is true. 74 | const_iterator begin() const 75 | { 76 | assert(mAllValuesKnown); 77 | return mValues.begin(); 78 | } 79 | 80 | //! Iterator pointing to end of sequence of element values. 81 | //! Requires that allValuesKnown() is true. 82 | const_iterator end() const 83 | { 84 | assert(mAllValuesKnown); 85 | return mValues.end(); 86 | } 87 | 88 | //! True if operator[](k) is valid. 89 | bool valueKnown(int k) const; 90 | 91 | //! Return kth value. 92 | //! For a 0D tensor, k must be 0. 93 | //! Requires that valueKnown(k) is true. 94 | int64_t operator[](int k) const 95 | { 96 | assert(valueKnown(k)); 97 | return mValues[k]; 98 | } 99 | 100 | //! Return true if x and y always have the same value. 101 | friend bool operator==(const ShapeTensor& x, const ShapeTensor& y); 102 | friend ShapeTensor shapeOf(const ShapeTensor& t); 103 | 104 | //! Get TensorRT tensor representation. 105 | nvinfer1::ITensor& tensor(IImporterContext* ctx) const; 106 | 107 | const std::vector& values() const{return mValues;} 108 | void set_values(const std::vector& values){mValues = values;} 109 | 110 | private: 111 | //! Number of IShapeLayer to apply to mTensor to get ITensor representing value of *this. 112 | //! -1 for undefined *this, a value in [0,2] otherwise. 113 | //! 0: *this represents value of the tensor (always 0D or 1D) 114 | //! 1: *this represents shape of mTensor (always 1D) 115 | //! 2: *this represents rank of mTensor (always 1D tensor of length 1) 116 | mutable int8_t mDepth{-1}; 117 | 118 | //! True if all values are known. 119 | bool mAllValuesKnown{false}; 120 | 121 | static constexpr int kRANK_UNKNOWN = -1; 122 | static constexpr int kSIZE_UNKNOWN = -1; 123 | 124 | //! Rank of *this. 125 | //! Always -1, 0 or 1. 126 | int8_t mRank{kRANK_UNKNOWN}; 127 | 128 | //! Number of elements in the tensor, or -1 if unknown. 129 | int32_t mSize{kSIZE_UNKNOWN}; 130 | 131 | //! Must be non-null if mAllValuesKnown. 132 | mutable nvinfer1::ITensor* mTensor{nullptr}; 133 | 134 | //! Values of elements if some might be known. 135 | //! mValues.size() is always zero or equal to mSize. 136 | //! When mAllValuesKnown==true, all the values in mValues are correct 137 | //! and mValues.size() == mSize. 138 | //! When mAllValuesKnown==false, only the non-negative values in mValues 139 | //! are guaranteed to be correct, and only so if mValues.size() == mSize. 140 | std::vector mValues; 141 | }; 142 | 143 | //! Print ShapeTensor. Unknown values are printed as _. 144 | std::ostream& operator<<(std::ostream& stream, const ShapeTensor& x); 145 | 146 | //! Create 1D ShapeTensor of length n filled with value. 147 | //! count must be 1D ShapeTensor of size 1. 148 | ShapeTensor fillShapeVector(IImporterContext* ctx, int64_t value, const ShapeTensor& count); 149 | 150 | //! Create 1D ShapeTensor of length 1 containing given value. 151 | ShapeTensor shapeVector(int64_t value); 152 | 153 | //! Create 0D ShapeTensor containing the given value. 154 | ShapeTensor shapeScalar(int64_t value); 155 | 156 | //! Create 1D ShapeTensor containing [0,n). 157 | ShapeTensor iotaShapeVector(int32_t n); 158 | 159 | //! Create ShapeTensor filled with value that has same shape as exemplar. 160 | //! The exemplar must be 1D. 161 | ShapeTensor similar(IImporterContext* ctx, const ShapeTensor& exemplar, int64_t value); 162 | 163 | //! Elementwise addition 164 | ShapeTensor add(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 165 | 166 | //! Elementwise subtraction 167 | ShapeTensor sub(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 168 | 169 | //! Elementwise multiplication 170 | ShapeTensor mul(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 171 | 172 | //! Elementwise min 173 | ShapeTensor min(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 174 | 175 | //! Elementwise max 176 | ShapeTensor max(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 177 | 178 | //! Elementwise floor division 179 | ShapeTensor floorDiv(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 180 | 181 | //! Elementwise f, for a partial function f defined by: 182 | //! f(x,x) = x 183 | //! f(1,x) = x 184 | //! f(x,1) = x 185 | //! Undefined otherwise or if x < 0. 186 | ShapeTensor broadcast(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 187 | 188 | //! Return product of x[i] for i in [first..last), as 0D or one-element 1D tensor of given rank. 189 | ShapeTensor product(IImporterContext* ctx, const ShapeTensor& x, int first, int last, int rank); 190 | 191 | //! Gather where data is 1D tensor and indices can be 0D or 1D 192 | ShapeTensor gather(IImporterContext* ctx, const ShapeTensor& data, const ShapeTensor& indices); 193 | 194 | //! Concatenation of two 1D tensors 195 | ShapeTensor concat(IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y); 196 | 197 | //! Return gather(concat(x,y),subscripts) 198 | inline ShapeTensor interlace( 199 | IImporterContext* ctx, const ShapeTensor& x, const ShapeTensor& y, const ShapeTensor& subscripts) 200 | { 201 | return gather(ctx, concat(ctx, x, y), subscripts); 202 | } 203 | 204 | //! Return shape of a tensor. 205 | ShapeTensor shapeOf(nvinfer1::ITensor& tensor); 206 | ShapeTensor shapeOf(const ShapeTensor& tensor); 207 | ShapeTensor shapeOf(TensorOrWeights& t); 208 | 209 | //! Reshape 0D tensor to 1D tensor. 210 | ShapeTensor convertTo1D(IImporterContext* ctx, const ShapeTensor& tensor); 211 | 212 | //! Add an ISliceLayer. 213 | nvinfer1::ISliceLayer* addSlice(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& starts, 214 | const ShapeTensor& sizes, const ShapeTensor& strides); 215 | 216 | //! Add an IShuffleLayer. 217 | //! If the result does not need to have its parameters changed, and 218 | //! optimizing the no-op case away is okay, use function reshape instead. 219 | //! 220 | //! In general the default zeroIsPlaceholder=false should be used so 221 | //! that reshaping to empty tensors works correctly. Calling with 222 | //! zeroIsPlaceholder=true should happen only when replicating the 223 | //! semantics of the ONNX Reshape operator. 224 | nvinfer1::IShuffleLayer* addShuffle( 225 | IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& reshapeDims, bool zeroIsPlaceholder = false); 226 | 227 | //! Add an IFillLayer. 228 | nvinfer1::IFillLayer* addFill(IImporterContext* ctx, const ShapeTensor& shape, nvinfer1::FillOperation op); 229 | 230 | //! Reshape a tensor. 231 | //! 232 | //! Treats any zeros in newShape as dimensions, not placeholders. 233 | //! Implementation note: does not insert shuffle if it's a no-op. 234 | nvinfer1::ITensor& reshape(IImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& newShape); 235 | 236 | } // namespace onnx2trt 237 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ShapedWeights.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "ShapedWeights.hpp" 6 | #include "onnx2trt_utils.hpp" 7 | #include "trt_utils.hpp" 8 | #include 9 | #include 10 | 11 | namespace onnx2trt 12 | { 13 | 14 | size_t ShapedWeights::count() const 15 | { 16 | if (this->values == nullptr && this->shape.nbDims <= 0) 17 | { 18 | return 0; 19 | } 20 | // TRT supports scalars, so 0D tensors should have a count of 1. 21 | size_t c = 1; 22 | for (int i = 0; i < this->shape.nbDims; ++i) 23 | { 24 | c *= this->shape.d[i]; 25 | } 26 | return c; 27 | } 28 | 29 | ShapedWeights ShapedWeights::empty(DataType type) 30 | { 31 | return ShapedWeights(type, nullptr, nvinfer1::Dims{0}); 32 | } 33 | 34 | ShapedWeights::ShapedWeights() 35 | : values(nullptr) 36 | , shape{0} 37 | { 38 | } 39 | 40 | ShapedWeights::ShapedWeights(DataType type_, void* values_, nvinfer1::Dims shape_) 41 | : type(type_) 42 | , values(values_) 43 | , shape(shape_) 44 | { 45 | // Note: this->shape.type[] is not used 46 | } 47 | 48 | size_t ShapedWeights::size_bytes() const 49 | { 50 | return this->count() * getDtypeSize(this->type); 51 | } 52 | 53 | ShapedWeights::operator bool() const 54 | { 55 | return (bool) this->values; 56 | } 57 | 58 | ShapedWeights::operator nvinfer1::Weights() const 59 | { 60 | nvinfer1::Weights w{}; 61 | w.values = this->values; 62 | bool supported_type = convertDtype(this->type, &w.type); 63 | (void) supported_type; 64 | assert(supported_type); 65 | w.count = this->count(); 66 | return w; 67 | } 68 | 69 | const char* ShapedWeights::getName() const 70 | { 71 | return this->name; 72 | } 73 | 74 | void ShapedWeights::setName(const char* name) 75 | { 76 | this->name = name; 77 | } 78 | 79 | template 80 | void transpose4DWeights(ShapedWeights const& weights, nvinfer1::Permutation const perm, ShapedWeights* result) 81 | { 82 | nvinfer1::Dims original_shape = weights.shape; 83 | nvinfer1::Dims new_shape = result->shape; 84 | int nbDims = new_shape.nbDims; 85 | DType const* src = reinterpret_cast(weights.values); 86 | DType* dst = reinterpret_cast(result->values); 87 | 88 | nvinfer1::Dims expanded_original_shape{4, {1, 1, 1, 1}}; 89 | nvinfer1::Dims expanded_new_shape{4, {1, 1, 1, 1}}; 90 | nvinfer1::Permutation expanded_perm{0, 1, 2, 3}; 91 | 92 | int pad = 4 - nbDims; 93 | for (int i = 0; i < nbDims; ++i) 94 | { 95 | expanded_original_shape.d[pad + i] = original_shape.d[i]; 96 | expanded_new_shape.d[pad + i] = new_shape.d[i]; 97 | expanded_perm.order[pad + i] = perm.order[i] + pad; 98 | } 99 | 100 | 101 | int src_strides[4] = {1, 1, 1, 1}; 102 | int dst_strides[4] = {1, 1, 1, 1}; 103 | 104 | for (int i = 2; i >= 0; --i) 105 | { 106 | src_strides[i] = expanded_original_shape.d[i + 1] * src_strides[i + 1]; 107 | dst_strides[i] = expanded_new_shape.d[i + 1] * dst_strides[i + 1]; 108 | } 109 | 110 | for (int n = 0; n < expanded_original_shape.d[0]; ++n) 111 | { 112 | for (int c = 0; c < expanded_original_shape.d[1]; ++c) 113 | { 114 | for (int h = 0; h < expanded_original_shape.d[2]; ++h) 115 | { 116 | for (int w = 0; w < expanded_original_shape.d[3]; ++w) 117 | { 118 | int src_index = 0; 119 | int dst_index = 0; 120 | int src_coord[4] = {n, c, h, w}; 121 | int dst_coord[4]; 122 | for (int i = 0 ; i < 4; ++i) 123 | { 124 | dst_coord[i] = src_coord[expanded_perm.order[i]]; 125 | src_index += src_coord[i] * src_strides[i]; 126 | dst_index += dst_coord[i] * dst_strides[i]; 127 | } 128 | dst[dst_index] = src[src_index]; 129 | } 130 | } 131 | } 132 | } 133 | } 134 | 135 | bool transposeWeights(ShapedWeights const& weights, nvinfer1::Permutation const& perm, ShapedWeights* result, IImporterContext* ctx) 136 | { 137 | nvinfer1::Dims shape = weights.shape; 138 | int nbDims = shape.nbDims; 139 | nvinfer1::Dims new_shape; 140 | new_shape.nbDims = nbDims; 141 | for (int d = 0; d < nbDims; ++d) 142 | { 143 | new_shape.d[d] = shape.d[perm.order[d]]; 144 | result->shape.d[d] = new_shape.d[d]; 145 | } 146 | 147 | if (shape.nbDims <= 4) 148 | { 149 | if (weights.type == ::onnx::TensorProto::FLOAT) 150 | { 151 | transpose4DWeights(weights, perm, result); 152 | } 153 | else if (weights.type == ::onnx::TensorProto::FLOAT16) 154 | { 155 | transpose4DWeights(weights, perm, result); 156 | } 157 | else 158 | { 159 | return false; 160 | } 161 | } 162 | else 163 | { 164 | // TODO: Implement general transposes and multiple data types 165 | // Unsupported weights transpose 166 | return false; 167 | } 168 | nvinfer1::Dims permDims{nbDims, {}}; 169 | std::copy_n(perm.order, nbDims, permDims.d); 170 | LOG_WARNING("Weights " 171 | << weights.getName() << " has been transposed with permutation of " << permDims 172 | << "! If you plan on overwriting the weights with the Refitter API, the new weights must be pre-transposed."); 173 | result->setName(weights.getName()); 174 | return true; 175 | } 176 | 177 | } // namespace onnx2trt 178 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ShapedWeights.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace onnx2trt 11 | { 12 | 13 | class ShapedWeights 14 | { 15 | public: 16 | using DataType = int32_t; 17 | 18 | static ShapedWeights empty(DataType type); 19 | 20 | ShapedWeights(); 21 | 22 | explicit ShapedWeights(DataType type, void* values, nvinfer1::Dims shape_); 23 | 24 | size_t count() const; 25 | 26 | size_t size_bytes() const; 27 | 28 | const char* getName() const; 29 | 30 | void setName(const char* name); 31 | 32 | explicit operator bool() const; 33 | 34 | operator nvinfer1::Weights() const; 35 | 36 | template 37 | T& at(size_t index) 38 | { 39 | assert(index >= 0 && (index * sizeof(T)) < size_bytes()); 40 | return static_cast(values)[index]; 41 | } 42 | 43 | template 44 | const T& at(size_t index) const 45 | { 46 | assert(index >= 0 && (index * sizeof(T)) < size_bytes()); 47 | return static_cast(values)[index]; 48 | } 49 | 50 | public: 51 | DataType type; 52 | void* values; 53 | nvinfer1::Dims shape; 54 | const char* name{}; 55 | }; 56 | 57 | class IImporterContext; 58 | bool transposeWeights(ShapedWeights const& weights, nvinfer1::Permutation const& perm, ShapedWeights* result, IImporterContext* ctx); 59 | 60 | } // namespace onnx2trt 61 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/Status.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "NvOnnxParser.h" 8 | 9 | #include 10 | #include 11 | 12 | // Used to strip out build path information from debug prints 13 | #if defined(SOURCE_LENGTH) 14 | #define __FILENAME__ (__FILE__ + SOURCE_LENGTH) 15 | #else 16 | #define __FILENAME__ (__FILE__) 17 | #endif 18 | 19 | #define MAKE_ERROR(desc, code) onnx2trt::Status((code), (desc), __FILENAME__, __LINE__, __func__) 20 | 21 | #define ASSERT(condition, error_code) \ 22 | do \ 23 | { \ 24 | if (!(condition)) \ 25 | { \ 26 | return MAKE_ERROR("Assertion failed: " #condition, (error_code)); \ 27 | } \ 28 | } while (0) 29 | 30 | #define MAKE_INPUT_ERROR(desc, code, name) Status((code), (desc), name, __LINE__, __func__) 31 | 32 | #define ASSERT_INPUT(condition, error_code, name) \ 33 | do \ 34 | { \ 35 | if (!(condition)) \ 36 | { \ 37 | return MAKE_INPUT_ERROR("Assertion failed: " #condition, (error_code), (name)); \ 38 | } \ 39 | } while (0) 40 | 41 | #define ASSERT_C(condition, error_code) \ 42 | do \ 43 | { \ 44 | if (!(condition)) \ 45 | { \ 46 | return error_code; \ 47 | } \ 48 | } while (0) 49 | 50 | #define GET_VALUE(value_or_error_, result_ptr) \ 51 | do \ 52 | { \ 53 | auto const& value_or_error = value_or_error_; \ 54 | if (value_or_error.is_error()) \ 55 | { \ 56 | return value_or_error.error(); \ 57 | } \ 58 | else \ 59 | { \ 60 | *result_ptr = value_or_error.value(); \ 61 | } \ 62 | } while (0) 63 | 64 | #define CHECK(call) \ 65 | do \ 66 | { \ 67 | Status status = call; \ 68 | if (!status.is_success()) \ 69 | { \ 70 | return status; \ 71 | } \ 72 | } while (0) 73 | 74 | namespace onnx2trt 75 | { 76 | 77 | using nvonnxparser::ErrorCode; 78 | 79 | class Status : public nvonnxparser::IParserError 80 | { 81 | ErrorCode _code; 82 | std::string _desc; 83 | std::string _file; 84 | int _line; 85 | std::string _func; 86 | int _node; 87 | 88 | public: 89 | static Status success() 90 | { 91 | return Status(ErrorCode::kSUCCESS); 92 | } 93 | Status() 94 | { 95 | } 96 | explicit Status(ErrorCode code, std::string desc = "", std::string file = "", int line = 0, std::string func = "", 97 | int node = -1) 98 | : _code(code) 99 | , _desc(desc) 100 | , _file(file) 101 | , _line(line) 102 | , _func(func) 103 | , _node(node) 104 | { 105 | } 106 | ErrorCode code() const override 107 | { 108 | return _code; 109 | } 110 | const char* desc() const override 111 | { 112 | return _desc.c_str(); 113 | } 114 | const char* file() const override 115 | { 116 | return _file.c_str(); 117 | } 118 | int line() const override 119 | { 120 | return _line; 121 | } 122 | const char* func() const override 123 | { 124 | return _func.c_str(); 125 | } 126 | int node() const override 127 | { 128 | return _node; 129 | } 130 | bool is_error() const 131 | { 132 | return _code != ErrorCode::kSUCCESS; 133 | } 134 | bool is_success() const 135 | { 136 | return _code == ErrorCode::kSUCCESS; 137 | } 138 | void setNode(int node) 139 | { 140 | _node = node; 141 | } 142 | }; 143 | 144 | template 145 | class ValueOrStatus 146 | { 147 | bool _is_error; 148 | T _value; 149 | Status _error; 150 | 151 | public: 152 | ValueOrStatus(T const& value) 153 | : _is_error(false) 154 | , _value(value) 155 | , _error(Status::success()) 156 | { 157 | } 158 | ValueOrStatus(T&& value) 159 | : _is_error(false) 160 | , _value(value) 161 | , _error(Status::success()) 162 | { 163 | } 164 | ValueOrStatus(Status const& error) 165 | : _is_error(true) 166 | , _error(error) 167 | { 168 | } 169 | ValueOrStatus(Status&& error) 170 | : _is_error(true) 171 | , _error(error) 172 | { 173 | } 174 | bool is_error() const 175 | { 176 | return _is_error; 177 | } 178 | T const& value() const 179 | { 180 | assert(!_is_error); 181 | return _value; 182 | } 183 | T& value() 184 | { 185 | assert(!_is_error); 186 | return _value; 187 | } 188 | Status const& error() const 189 | { 190 | assert(_is_error); 191 | return _error; 192 | } 193 | }; 194 | 195 | } // namespace onnx2trt 196 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/TensorOrWeights.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "ShapedWeights.hpp" 8 | 9 | #include 10 | #include 11 | 12 | namespace onnx2trt 13 | { 14 | 15 | class TensorOrWeights 16 | { 17 | union 18 | { 19 | nvinfer1::ITensor* _tensor; 20 | ShapedWeights _weights; 21 | }; 22 | enum 23 | { 24 | NODE_TENSOR, 25 | NODE_WEIGHTS 26 | } _variant; 27 | 28 | public: 29 | TensorOrWeights() 30 | : _tensor(nullptr) 31 | , _variant(NODE_TENSOR) 32 | { 33 | } 34 | TensorOrWeights(nvinfer1::ITensor* tensor) 35 | : _tensor(tensor) 36 | , _variant(NODE_TENSOR) 37 | { 38 | } 39 | TensorOrWeights(ShapedWeights const& weights) 40 | : _weights(weights) 41 | , _variant(NODE_WEIGHTS) 42 | { 43 | } 44 | bool is_tensor() const 45 | { 46 | return _variant == NODE_TENSOR; 47 | } 48 | bool is_weights() const 49 | { 50 | return _variant == NODE_WEIGHTS; 51 | } 52 | bool isNullTensor() const 53 | { 54 | return is_tensor() && _tensor == nullptr; 55 | } 56 | nvinfer1::ITensor& tensor() 57 | { 58 | assert(!isNullTensor()); 59 | return *_tensor; 60 | } 61 | nvinfer1::ITensor const& tensor() const 62 | { 63 | assert(!isNullTensor()); 64 | return *_tensor; 65 | } 66 | ShapedWeights& weights() 67 | { 68 | assert(is_weights()); 69 | return _weights; 70 | } 71 | ShapedWeights const& weights() const 72 | { 73 | assert(is_weights()); 74 | return _weights; 75 | } 76 | nvinfer1::Dims shape() const 77 | { 78 | return is_tensor() ? _tensor->getDimensions() : _weights.shape; 79 | } 80 | explicit operator bool() const 81 | { 82 | return is_tensor() ? _tensor != nullptr : static_cast(_weights); 83 | } 84 | bool isInt32() const 85 | { 86 | return is_tensor() ? _tensor->getType() == nvinfer1::DataType::kINT32 : _weights.type == ::onnx::TensorProto_DataType_INT32; 87 | } 88 | bool isBool() const 89 | { 90 | return is_tensor() ? _tensor->getType() == nvinfer1::DataType::kBOOL : _weights.type == ::onnx::TensorProto_DataType_BOOL; 91 | } 92 | std::string getName() const 93 | { 94 | return is_tensor() ? _tensor->getName() : _weights.getName(); 95 | } 96 | std::string getType() const 97 | { 98 | if (is_tensor()) 99 | { 100 | switch(_tensor->getType()) 101 | { 102 | case nvinfer1::DataType::kFLOAT:return "FLOAT"; 103 | case nvinfer1::DataType::kHALF: return "HALF"; 104 | case nvinfer1::DataType::kINT8: return "INT8"; 105 | case nvinfer1::DataType::kINT32: return "INT32"; 106 | case nvinfer1::DataType::kBOOL: return "BOOL"; 107 | default: return "UNKNOWN TYPE"; 108 | } 109 | } 110 | else 111 | { 112 | switch(_weights.type) 113 | { 114 | case ::onnx::TensorProto::DOUBLE: return "DOUBLE -> FLOAT"; 115 | case ::onnx::TensorProto::FLOAT: return "FLOAT"; 116 | case ::onnx::TensorProto::INT8: return "INT8"; 117 | case ::onnx::TensorProto::FLOAT16: return "HALF"; 118 | case ::onnx::TensorProto::BOOL: return "BOOL"; 119 | case ::onnx::TensorProto::INT32: return "INT32"; 120 | case ::onnx::TensorProto::INT64: return "INT64 -> INT32"; 121 | default: return "UNKNOWN TYPE"; 122 | } 123 | } 124 | } 125 | }; 126 | 127 | } // namespace onnx2trt 128 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/builtin_op_importers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "onnx2trt.hpp" 8 | #include "utils.hpp" 9 | 10 | namespace onnx2trt 11 | { 12 | 13 | string_map& getBuiltinOpImporterMap(); 14 | 15 | } // namespace onnx2trt 16 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnx2trt.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "NvOnnxParser.h" 8 | #include "ShapedWeights.hpp" 9 | #include "Status.hpp" 10 | #include "TensorOrWeights.hpp" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace onnx2trt 21 | { 22 | 23 | class IImporterContext; 24 | 25 | // TODO: Find ABI-safe alternative approach for this: 26 | // Can't use std::vector 27 | // Can't use ::onnx::NodeProto 28 | // Can't use std::function 29 | typedef ValueOrStatus> NodeImportResult; 30 | typedef std::function& inputs)> 32 | NodeImporter; 33 | 34 | template 35 | using StringMap = std::unordered_map; 36 | 37 | class IImporterContext 38 | { 39 | public: 40 | virtual nvinfer1::INetworkDefinition* network() = 0; 41 | virtual StringMap& tensors() = 0; 42 | virtual StringMap& tensorLocations() = 0; 43 | virtual StringMap& tensorRangeMins() = 0; 44 | virtual StringMap& tensorRangeMaxes() = 0; 45 | virtual StringMap& layerPrecisions() = 0; 46 | virtual std::unordered_set& unsupportedShapeTensors() = 0; 47 | virtual StringMap& loopTensors() = 0; 48 | virtual void setOnnxFileLocation(std::string location) = 0; 49 | virtual std::string getOnnxFileLocation() = 0; 50 | virtual void registerTensor(TensorOrWeights tensor, const std::string& basename) = 0; 51 | virtual void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) = 0; 52 | virtual ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) = 0; 53 | virtual int64_t getOpsetVersion(const char* domain = "") const = 0; 54 | virtual nvinfer1::ILogger& logger() = 0; 55 | virtual bool hasError() const = 0; 56 | virtual nvinfer1::IErrorRecorder* getErrorRecorder() const = 0; 57 | 58 | protected: 59 | virtual ~IImporterContext() 60 | { 61 | } 62 | }; 63 | 64 | } // namespace onnx2trt 65 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnx2trt_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #if NV_TENSORRT_MAJOR < 4 11 | namespace nvinfer1 12 | { 13 | 14 | enum class PluginFormat : uint8_t 15 | { 16 | kNCHW = 0, //!< NCHW 17 | kNC2HW2 = 1, //!< NCHW with 2-element packed channels 18 | kNHWC8 = 2 //!< NHWC with 8-element packed channels (C 19 | //! must be a multiple of 8) 20 | }; 21 | // from NvInfer.h 22 | class IPluginExt : public IPlugin 23 | { 24 | public: 25 | virtual int getTensorRTVersion() const noexcept 26 | { 27 | return NV_TENSORRT_VERSION; 28 | } 29 | virtual bool supportsFormat(DataType type, PluginFormat format) const noexcept = 0; 30 | virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, 31 | DataType type, PluginFormat format, int maxBatchSize) noexcept 32 | = 0; 33 | 34 | protected: 35 | void configure( 36 | const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept final 37 | { 38 | try 39 | { 40 | DataType type = nvinfer1::DataType::kFLOAT; 41 | PluginFormat format = nvinfer1::PluginFormat::kLINEAR; 42 | return this->configureWithFormat(inputDims, nbInputs, outputDims, nbOutputs, type, format, maxBatchSize); 43 | } 44 | catch (const std::exception& e) 45 | { 46 | nvinfer1::getLogger()->log(nvinfer1::ILogger::Severity::kERROR, e.what().c_str()); 47 | } 48 | } 49 | virtual ~IPluginExt() 50 | { 51 | } 52 | }; 53 | 54 | } // namespace nvinfer1 55 | #endif 56 | 57 | namespace onnx2trt 58 | { 59 | 60 | struct IOwnable 61 | { 62 | virtual void destroy() = 0; 63 | 64 | protected: 65 | virtual ~IOwnable() 66 | { 67 | } 68 | }; 69 | 70 | struct OwnableDeleter 71 | { 72 | void operator()(IOwnable* obj) const 73 | { 74 | obj->destroy(); 75 | } 76 | }; 77 | 78 | using UniqueOwnable = std::unique_ptr; 79 | class Plugin; 80 | class PluginV2; 81 | 82 | } // namespace onnx2trt 83 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnx2trt_runtime.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "onnx2trt_common.hpp" 8 | 9 | namespace onnx2trt 10 | { 11 | 12 | typedef Plugin* (*plugin_deserializer)(const void* serialData, size_t serialLength); 13 | 14 | } // namespace onnx2trt 15 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnxErrorRecorder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "onnxErrorRecorder.hpp" 6 | #include 7 | 8 | namespace onnx2trt 9 | { 10 | 11 | 12 | ONNXParserErrorRecorder* ONNXParserErrorRecorder::create( 13 | nvinfer1::ILogger* logger, nvinfer1::IErrorRecorder* otherRecorder) 14 | { 15 | try 16 | { 17 | auto recorder = new ONNXParserErrorRecorder(logger, otherRecorder); 18 | if (recorder) 19 | { 20 | recorder->incRefCount(); 21 | } 22 | return recorder; 23 | } 24 | catch (const std::exception& e) 25 | { 26 | logError(logger, e.what()); 27 | return nullptr; 28 | } 29 | } 30 | 31 | void ONNXParserErrorRecorder::destroy(ONNXParserErrorRecorder*& recorder) 32 | { 33 | if (recorder) 34 | { 35 | recorder->decRefCount(); 36 | recorder = nullptr; 37 | } 38 | } 39 | 40 | void ONNXParserErrorRecorder::logError(nvinfer1::ILogger* logger, const char* str) 41 | { 42 | if (logger) 43 | { 44 | logger->log(ILogger::Severity::kERROR, str); 45 | } 46 | } 47 | 48 | ONNXParserErrorRecorder::ONNXParserErrorRecorder( 49 | nvinfer1::ILogger* logger, nvinfer1::IErrorRecorder* otherRecorder) 50 | : mUserRecorder(otherRecorder) 51 | , mLogger(logger) 52 | { 53 | if (mUserRecorder) 54 | { 55 | mUserRecorder->incRefCount(); 56 | } 57 | } 58 | 59 | ONNXParserErrorRecorder::~ONNXParserErrorRecorder() noexcept 60 | { 61 | if (mUserRecorder) 62 | { 63 | mUserRecorder->decRefCount(); 64 | } 65 | } 66 | 67 | void ONNXParserErrorRecorder::clear() noexcept 68 | { 69 | try 70 | { 71 | // grab a lock so that there is no addition while clearing. 72 | std::lock_guard guard(mStackLock); 73 | mErrorStack.clear(); 74 | } 75 | catch (const std::exception& e) 76 | { 77 | logError(mLogger, e.what()); 78 | } 79 | }; 80 | 81 | bool ONNXParserErrorRecorder::reportError( 82 | nvinfer1::ErrorCode val, nvinfer1::IErrorRecorder::ErrorDesc desc) noexcept 83 | { 84 | try 85 | { 86 | std::lock_guard guard(mStackLock); 87 | mErrorStack.push_back(errorPair(val, desc)); 88 | if (mUserRecorder) 89 | { 90 | mUserRecorder->reportError(val, desc); 91 | } 92 | else 93 | { 94 | logError(mLogger, desc); 95 | } 96 | } 97 | catch (const std::exception& e) 98 | { 99 | logError(mLogger, e.what()); 100 | } 101 | // All errors are considered fatal. 102 | return true; 103 | } 104 | 105 | nvinfer1::IErrorRecorder::RefCount ONNXParserErrorRecorder::incRefCount() noexcept 106 | { 107 | // Atomically increment or decrement the ref counter. 108 | return ++mRefCount; 109 | } 110 | 111 | nvinfer1::IErrorRecorder::RefCount ONNXParserErrorRecorder::decRefCount() noexcept 112 | { 113 | auto newVal = --mRefCount; 114 | if (newVal == 0) 115 | { 116 | delete this; 117 | } 118 | return newVal; 119 | } 120 | 121 | } // namespace onnx2trt 122 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnxErrorRecorder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "NvInferRuntimeCommon.h" 8 | #include "onnx2trt_utils.hpp" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace onnx2trt 16 | { 17 | 18 | //! 19 | //! A simple implementation of the IErrorRecorder interface for 20 | //! use by ONNX importer. 21 | //! ONNX-importer Error recorder is based on a vector that pairs the error 22 | //! code and the error string into a single element. It also uses 23 | //! standard mutex and atomics in order to make sure that the code 24 | //! works in a multi-threaded environment. 25 | //! 26 | class ONNXParserErrorRecorder : public nvinfer1::IErrorRecorder 27 | { 28 | using RefCount = nvinfer1::IErrorRecorder::RefCount; 29 | using ErrorDesc = nvinfer1::IErrorRecorder::ErrorDesc; 30 | using ErrorCode = nvinfer1::ErrorCode; 31 | using IErrorRecorder = nvinfer1::IErrorRecorder; 32 | using ILogger = nvinfer1::ILogger; 33 | 34 | using errorPair = std::pair; 35 | using errorStack = std::vector; 36 | 37 | public: 38 | static ONNXParserErrorRecorder* create( 39 | ILogger* logger, IErrorRecorder* otherRecorder = nullptr); 40 | 41 | static void destroy(ONNXParserErrorRecorder*& recorder); 42 | 43 | void clear() noexcept final; 44 | RefCount incRefCount() noexcept final; 45 | RefCount decRefCount() noexcept final; 46 | bool reportError(ErrorCode val, ErrorDesc desc) noexcept final; 47 | 48 | int32_t getNbErrors() const noexcept final 49 | { 50 | return mErrorStack.size(); 51 | } 52 | 53 | ErrorCode getErrorCode(int32_t errorIdx) const noexcept final 54 | { 55 | return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first; 56 | } 57 | 58 | ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final 59 | { 60 | return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str(); 61 | } 62 | 63 | bool hasOverflowed() const noexcept final 64 | { 65 | // This class can never overflow since we have dynamic resize via std::vector usage. 66 | return false; 67 | } 68 | 69 | protected: 70 | ONNXParserErrorRecorder(ILogger* logger, IErrorRecorder* otherRecorder = nullptr); 71 | 72 | virtual ~ONNXParserErrorRecorder() noexcept; 73 | 74 | static void logError(ILogger* logger, const char* str); 75 | 76 | // Simple helper functions. 77 | const errorPair& operator[](size_t index) const noexcept 78 | { 79 | return mErrorStack[index]; 80 | } 81 | 82 | bool invalidIndexCheck(int32_t index) const noexcept 83 | { 84 | // By converting signed to unsigned, we only need a single check since 85 | // negative numbers turn into large positive greater than the size. 86 | size_t sIndex = index; 87 | return sIndex >= mErrorStack.size(); 88 | } 89 | // Mutex to hold when locking mErrorStack. 90 | std::mutex mStackLock; 91 | 92 | // Reference count of the class. Destruction of the class when mRefCount 93 | // is not zero causes undefined behavior. 94 | std::atomic mRefCount{0}; 95 | 96 | // The error stack that holds the errors recorded by TensorRT. 97 | errorStack mErrorStack; 98 | 99 | // Original error recorder (set by user) 100 | IErrorRecorder* mUserRecorder{nullptr}; 101 | 102 | // logger 103 | ILogger* mLogger{nullptr}; 104 | }; // class ONNXParserErrorRecorder 105 | 106 | } // namespace onnx2trt 107 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnx_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #pragma once 14 | 15 | namespace 16 | { 17 | 18 | template 19 | bool convertOnnxDims(OnnxDims const& onnxDims, nvinfer1::Dims& trtDims) 20 | { 21 | std::vector onnxDims_vector; 22 | for (const auto& onnxDim : onnxDims) 23 | { 24 | const int dim = onnxDim.dim_param() == "" ? (onnxDim.dim_value() >= 0 ? onnxDim.dim_value() : -1) : -1; 25 | onnxDims_vector.emplace_back(dim); 26 | } 27 | trtDims.nbDims = onnxDims_vector.size(); 28 | assert(trtDims.nbDims <= nvinfer1::Dims::MAX_DIMS); 29 | std::copy(onnxDims_vector.begin(), onnxDims_vector.end(), trtDims.d); 30 | return true; 31 | } 32 | 33 | // Removes raw data from the text representation of an ONNX model 34 | void remove_raw_data_strings(std::string& s) 35 | { 36 | std::string::size_type beg = 0; 37 | const std::string key = "raw_data: \""; 38 | const std::string sub = "..."; 39 | while ((beg = s.find(key, beg)) != std::string::npos) 40 | { 41 | beg += key.length(); 42 | std::string::size_type end = beg - 1; 43 | // Note: Must skip over escaped end-quotes 44 | while (s[(end = s.find("\"", ++end)) - 1] == '\\') 45 | { 46 | } 47 | if (end - beg > 128) 48 | { // Only remove large data strings 49 | s.replace(beg, end - beg, "..."); 50 | } 51 | beg += sub.length(); 52 | } 53 | } 54 | 55 | // Removes float_data, int32_data etc. from the text representation of an ONNX model 56 | std::string remove_repeated_data_strings(std::string& s) 57 | { 58 | std::istringstream iss(s); 59 | std::ostringstream oss; 60 | bool is_repeat = false; 61 | for (std::string line; std::getline(iss, line);) 62 | { 63 | if (line.find("float_data:") != std::string::npos || line.find("int32_data:") != std::string::npos 64 | || line.find("int64_data:") != std::string::npos) 65 | { 66 | if (!is_repeat) 67 | { 68 | is_repeat = true; 69 | oss << line.substr(0, line.find(":") + 1) << " ...\n"; 70 | } 71 | } 72 | else 73 | { 74 | is_repeat = false; 75 | oss << line << "\n"; 76 | } 77 | } 78 | return oss.str(); 79 | } 80 | 81 | } // anonymous namespace 82 | 83 | inline std::string pretty_print_onnx_to_string(::google::protobuf::Message const& message) 84 | { 85 | std::string s; 86 | ::google::protobuf::TextFormat::PrintToString(message, &s); 87 | remove_raw_data_strings(s); 88 | s = remove_repeated_data_strings(s); 89 | return s; 90 | } 91 | 92 | inline std::ostream& operator<<(std::ostream& stream, ::onnx::ModelProto const& message) 93 | { 94 | stream << pretty_print_onnx_to_string(message); 95 | return stream; 96 | } 97 | 98 | inline std::ostream& operator<<(std::ostream& stream, ::onnx::NodeProto const& message) 99 | { 100 | stream << pretty_print_onnx_to_string(message); 101 | return stream; 102 | } 103 | 104 | //... 105 | //...Consider moving all of the below functions into a stand alone 106 | //... 107 | 108 | inline bool ParseFromFile_WAR(google::protobuf::Message* msg, const char* filename) 109 | { 110 | 111 | std::ifstream stream(filename, std::ios::in | std::ios::binary); 112 | if (!stream) 113 | { 114 | std::cerr << "Could not open file " << std::string(filename) << std::endl; 115 | return false; 116 | } 117 | google::protobuf::io::IstreamInputStream rawInput(&stream); 118 | 119 | google::protobuf::io::CodedInputStream coded_input(&rawInput); 120 | // Note: This WARs the very low default size limit (64MB) 121 | coded_input.SetTotalBytesLimit(std::numeric_limits::max()); 122 | return msg->ParseFromCodedStream(&coded_input); 123 | } 124 | 125 | inline bool ParseFromTextFile(google::protobuf::Message* msg, const char* filename) 126 | { 127 | std::ifstream stream(filename, std::ios::in); 128 | if (!stream) 129 | { 130 | std::cerr << "Could not open file " << std::string(filename) << std::endl; 131 | return false; 132 | } 133 | 134 | google::protobuf::io::IstreamInputStream rawInput(&stream); 135 | 136 | return google::protobuf::TextFormat::Parse(&rawInput, msg); 137 | } 138 | 139 | inline std::string onnx_ir_version_string(int64_t ir_version = ::onnx::IR_VERSION) 140 | { 141 | int onnx_ir_major = ir_version / 1000000; 142 | int onnx_ir_minor = ir_version % 1000000 / 10000; 143 | int onnx_ir_patch = ir_version % 10000; 144 | return (std::to_string(onnx_ir_major) + "." + std::to_string(onnx_ir_minor) + "." + std::to_string(onnx_ir_patch)); 145 | } 146 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/readme.md: -------------------------------------------------------------------------------- 1 | # ONNX Parser 2 | - 这几个文件提取自官方的onnx-tensorrt,去掉python方面,其他都在 3 | - 另外增加了Plugin节点的支持 4 | - https://github.com/onnx/onnx-tensorrt -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/toposort.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | using std::cout; 12 | using std::cerr; 13 | using std::endl; 14 | 15 | namespace 16 | { 17 | 18 | enum NodeState 19 | { 20 | NODE_UNVISITED, 21 | NODE_ACTIVE, 22 | NODE_VISITED 23 | }; 24 | 25 | template 26 | bool get_post_order(size_t node_idx, Container const& nodes, std::unordered_map const& node_map, 27 | std::vector* node_states, std::vector* order) 28 | { 29 | NodeState& node_state = node_states->at(node_idx); 30 | if (node_state == NODE_ACTIVE) 31 | { 32 | // Cycle detected! 33 | cerr << "ERROR: Graph contains a cycle" << endl; 34 | return false; 35 | } 36 | else if (node_state == NODE_VISITED) 37 | { 38 | return true; 39 | } 40 | else 41 | { 42 | node_state = NODE_ACTIVE; 43 | // TODO: This .Get().input() is highly specific to protobuf, should 44 | // generalise it somehow. 45 | for (auto const& input : nodes.Get(node_idx).input()) 46 | { 47 | if (!node_map.count(input)) 48 | { 49 | // Input node not found in graph! 50 | // cerr << "ERROR: Input node not found in graph: " 51 | // << input << endl; 52 | // return false; 53 | continue; // Skip missing input edges 54 | } 55 | size_t input_node_idx = node_map.at(input); 56 | if (!get_post_order(input_node_idx, nodes, node_map, node_states, order)) 57 | { 58 | return false; 59 | } 60 | } 61 | node_state = NODE_VISITED; 62 | order->push_back(node_idx); 63 | } 64 | return true; 65 | } 66 | 67 | } // anonymous namespace 68 | 69 | template 70 | bool toposort(Container const& nodes, std::vector* order) 71 | { 72 | std::unordered_map node_map; 73 | for (size_t i = 0; i < (size_t) nodes.size(); ++i) 74 | { 75 | // TODO: This .Get().input() is highly specific to protobuf, should 76 | // generalise it somehow. 77 | for (auto const& output : nodes.Get(i).output()) 78 | { 79 | if (!node_map.emplace(output, i).second) 80 | { 81 | // Output name appears more than once in graph! 82 | cerr << "ERROR: Output name is not unique: " << output << endl; 83 | return false; 84 | } 85 | } 86 | } 87 | order->reserve(nodes.size()); 88 | std::vector node_states(nodes.size(), NODE_UNVISITED); 89 | for (size_t i = 0; i < (size_t) nodes.size(); ++i) 90 | { 91 | if (!get_post_order(i, nodes, node_map, &node_states, order)) 92 | { 93 | return false; 94 | } 95 | } 96 | return true; 97 | } 98 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/trt_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "Status.hpp" 8 | #include "TensorOrWeights.hpp" 9 | #include "onnx2trt.hpp" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace onnx2trt 17 | { 18 | 19 | inline int getDtypeSize(nvinfer1::DataType trtDtype) 20 | { 21 | switch (trtDtype) 22 | { 23 | case nvinfer1::DataType::kFLOAT: return 4; 24 | case nvinfer1::DataType::kINT8: return 1; 25 | case nvinfer1::DataType::kHALF: return 2; 26 | case nvinfer1::DataType::kINT32: 27 | return 4; 28 | // TRT does not support booleans as a native type, so we treat them like int32 values. 29 | case nvinfer1::DataType::kBOOL: 30 | return 4; 31 | // TODO: Some sort of error handling 32 | default: return -1; 33 | } 34 | } 35 | 36 | inline nvinfer1::Dims insert_dim(nvinfer1::Dims const& dims, int idx, int value) 37 | { 38 | assert(idx < dims.nbDims + 1); 39 | nvinfer1::Dims new_dims; 40 | new_dims.nbDims = dims.nbDims + 1; 41 | for (int i = 0; i < idx; ++i) 42 | { 43 | new_dims.d[i] = dims.d[i]; 44 | } 45 | new_dims.d[idx] = value; 46 | for (int i = idx + 1; i < new_dims.nbDims; ++i) 47 | { 48 | new_dims.d[i] = dims.d[i - 1]; 49 | } 50 | return new_dims; 51 | } 52 | 53 | inline nvinfer1::Dims remove_dim(nvinfer1::Dims const& dims, int idx) 54 | { 55 | assert(idx < dims.nbDims); 56 | nvinfer1::Dims new_dims; 57 | new_dims.nbDims = dims.nbDims - 1; 58 | for (int i = 0; i < idx; ++i) 59 | { 60 | new_dims.d[i] = dims.d[i]; 61 | } 62 | for (int i = idx; i < new_dims.nbDims; ++i) 63 | { 64 | new_dims.d[i] = dims.d[i + 1]; 65 | } 66 | // Special case for scalar result (i.e., there was only one dim originally) 67 | if (new_dims.nbDims == 0) 68 | { 69 | new_dims.nbDims = 1; 70 | new_dims.d[0] = 1; 71 | } 72 | return new_dims; 73 | } 74 | 75 | // Adds unitary dimensions on the left 76 | inline nvinfer1::Dims expand_dims(nvinfer1::Dims const& dims, int ndim_new) 77 | { 78 | assert(dims.nbDims <= ndim_new); 79 | nvinfer1::Dims new_dims; 80 | new_dims.nbDims = ndim_new; 81 | int j = 0; 82 | for (; j < ndim_new - dims.nbDims; ++j) 83 | { 84 | new_dims.d[j] = 1; 85 | } 86 | for (int i = 0; i < dims.nbDims; ++i, ++j) 87 | { 88 | new_dims.d[j] = dims.d[i]; 89 | } 90 | return new_dims; 91 | } 92 | 93 | inline nvinfer1::Permutation remove_first_dim(nvinfer1::Permutation const& perm) 94 | { 95 | assert(perm.order[0] == 0); 96 | nvinfer1::Permutation new_perm; 97 | int ndim = nvinfer1::Dims::MAX_DIMS; 98 | for (int i = 0; i < ndim - 1; ++i) 99 | { 100 | new_perm.order[i] = perm.order[i + 1] - 1; 101 | } 102 | return new_perm; 103 | } 104 | 105 | inline nvinfer1::Dims squeeze_trailing_dims(nvinfer1::Dims const& dims) 106 | { 107 | nvinfer1::Dims new_dims = dims; 108 | // Note: TRT requires at least one dimension, so we don't squeeze [1]->[] 109 | while (new_dims.nbDims > 1 && new_dims.d[new_dims.nbDims - 1] == 1) 110 | { 111 | --new_dims.nbDims; 112 | } 113 | return new_dims; 114 | } 115 | 116 | inline nvinfer1::Dims squeeze_leading_dims(const nvinfer1::Dims& dims) 117 | { 118 | nvinfer1::Dims newDims; 119 | // Copy dims only if a non-1 has been seen already. 120 | bool non1Seen{false}; 121 | newDims.nbDims = std::copy_if(dims.d, dims.d + dims.nbDims, newDims.d, 122 | [&non1Seen](int x) { 123 | non1Seen = (x != 1) ? true : non1Seen; 124 | return non1Seen; 125 | }) 126 | - newDims.d; 127 | return newDims; 128 | } 129 | 130 | inline nvinfer1::DimsHW operator-(nvinfer1::DimsHW dims) 131 | { 132 | return nvinfer1::DimsHW(-dims.h(), -dims.w()); 133 | } 134 | 135 | // Note: These are used for checking beg_padding == end_padding 136 | inline bool operator==(nvinfer1::Dims const& a, nvinfer1::Dims const& b) 137 | { 138 | if (a.nbDims != b.nbDims) 139 | { 140 | return false; 141 | } 142 | for (int i = 0; i < a.nbDims; ++i) 143 | { 144 | if (a.d[i] != b.d[i]) 145 | { 146 | return false; 147 | } 148 | } 149 | return true; 150 | } 151 | inline bool operator!=(nvinfer1::Dims const& a, nvinfer1::Dims const& b) 152 | { 153 | return !(a == b); 154 | } 155 | 156 | inline TensorOrWeights identity(IImporterContext* ctx, TensorOrWeights input) 157 | { 158 | if (input.is_weights()) 159 | { 160 | return input; 161 | } 162 | else 163 | { 164 | auto* layer = ctx->network()->addIdentity(input.tensor()); 165 | if (!layer) 166 | { 167 | return nullptr; 168 | } 169 | return layer->getOutput(0); 170 | } 171 | } 172 | 173 | inline ::onnx::TensorProto_DataType trtDataTypeToONNX(nvinfer1::DataType dt) 174 | { 175 | switch (dt) 176 | { 177 | case nvinfer1::DataType::kFLOAT: return ::onnx::TensorProto::FLOAT; 178 | case nvinfer1::DataType::kHALF: return ::onnx::TensorProto::FLOAT16; 179 | case nvinfer1::DataType::kINT32: return ::onnx::TensorProto::INT32; 180 | case nvinfer1::DataType::kINT8: return ::onnx::TensorProto::INT8; 181 | case nvinfer1::DataType::kBOOL: return ::onnx::TensorProto::BOOL; 182 | default: return ::onnx::TensorProto_DataType_UNDEFINED; 183 | } 184 | throw std::runtime_error{"Unreachable"}; 185 | } 186 | 187 | } // namespace onnx2trt 188 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | template 10 | using string_map = std::unordered_map; 11 | -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/onnxplugin.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "onnxplugin.hpp" 3 | #include 4 | 5 | using namespace nvinfer1; 6 | using namespace std; 7 | 8 | namespace ONNXPlugin { 9 | 10 | GTensor::GTensor(float* ptr, int ndims, int* dims) { 11 | this->ptr_ = ptr; 12 | this->shape_.insert(shape_.end(), dims, dims + ndims); 13 | this->dtype_ = TRT::DataType::Float; 14 | } 15 | 16 | int GTensor::offset_array(size_t size, const int* index_array) const{ 17 | 18 | Assert(size <= shape_.size()); 19 | int value = 0; 20 | for(int i = 0; i < shape_.size(); ++i){ 21 | 22 | if(i < size) 23 | value += index_array[i]; 24 | 25 | if(i + 1 < shape_.size()) 26 | value *= shape_[i+1]; 27 | } 28 | return value; 29 | } 30 | 31 | int GTensor::offset_array(const std::vector& index) const{ 32 | return offset_array(index.size(), index.data()); 33 | } 34 | 35 | GTensor::GTensor(TRT::float16* ptr, int ndims, int* dims) { 36 | this->ptr_ = ptr; 37 | this->shape_.insert(shape_.end(), dims, dims + ndims); 38 | this->dtype_ = TRT::DataType::Float16; 39 | } 40 | 41 | GTensor::GTensor(const TRT::Tensor& tensor) { 42 | this->ptr_ = (float*)tensor.gpu(); 43 | this->shape_ = tensor.dims(); 44 | this->dtype_ = TRT::DataType::Float; 45 | } 46 | 47 | int GTensor::count(int start_axis) const { 48 | if(start_axis >= 0 && start_axis < shape_.size()){ 49 | int size = 1; 50 | for (int i = start_axis; i < shape_.size(); ++i) 51 | size *= shape_[i]; 52 | return size; 53 | }else{ 54 | return 0; 55 | } 56 | } 57 | 58 | /////////////////////////////////// 59 | LayerConfig::LayerConfig() { 60 | support_dtype_set_ = {nvinfer1::DataType::kFLOAT}; 61 | support_plugin_format_set_ = {nvinfer1::PluginFormat::kLINEAR}; 62 | usage_dtype_ = TRT::DataType::Float; 63 | usage_plugin_format_ = nvinfer1::PluginFormat::kLINEAR; 64 | } 65 | 66 | void LayerConfig::serialize_data_copy_to(void* buffer) { 67 | if (!serialize_data_.empty()) 68 | memcpy(buffer, &serialize_data_[0], serialize_data_.size()); 69 | } 70 | 71 | int LayerConfig::serialize() { 72 | 73 | Plugin::BinIO out; 74 | out << workspace_size_; 75 | out << usage_dtype_; 76 | out << max_batch_size_; 77 | out << usage_plugin_format_; 78 | out << info_; 79 | 80 | out << (int)weights_.size(); 81 | for (int i = 0; i < weights_.size(); ++i) { 82 | 83 | if (usage_dtype_ == TRT::DataType::Float) { 84 | weights_[i]->to_float(); 85 | } 86 | else if (usage_dtype_ == TRT::DataType::Float16) { 87 | weights_[i]->to_half(); 88 | } 89 | else{ 90 | INFOE("unsupport datatype: %d", (int)usage_dtype_); 91 | } 92 | 93 | out << weights_[i]->dims(); 94 | out << weights_[i]->type(); 95 | out.write((char*)weights_[i]->cpu(), weights_[i]->bytes()); 96 | } 97 | 98 | seril(out); 99 | serialize_data_ = out.writedMemory(); 100 | return serialize_data_.size(); 101 | } 102 | 103 | void LayerConfig::deserialize(const void* ptr, size_t length) { 104 | 105 | Plugin::BinIO in(ptr, length); 106 | in >> workspace_size_; 107 | in >> usage_dtype_; 108 | in >> max_batch_size_; 109 | in >> usage_plugin_format_; 110 | in >> info_; 111 | 112 | int nbWeights = 0; 113 | in >> nbWeights; 114 | 115 | weights_.resize(nbWeights); 116 | for (int i = 0; i < nbWeights; ++i) { 117 | std::vector dims; 118 | in >> dims; 119 | 120 | TRT::DataType dt; 121 | in >> dt; 122 | 123 | weights_[i].reset(new TRT::Tensor(dims, dt)); 124 | in.read(weights_[i]->cpu(), weights_[i]->bytes()); 125 | weights_[i]->gpu(); 126 | } 127 | deseril(in); 128 | } 129 | 130 | void LayerConfig::setup(const std::string& info, const std::vector>& weights) { 131 | 132 | this->info_ = info; 133 | this->weights_ = weights; 134 | } 135 | 136 | /////////////////////////////////////////////////////////////////////////////////// 137 | 138 | static TRT::DataType convert_trt_datatype(nvinfer1::DataType dt){ 139 | switch(dt){ 140 | case nvinfer1::DataType::kFLOAT: return TRT::DataType::Float; 141 | case nvinfer1::DataType::kHALF: return TRT::DataType::Float16; 142 | default: 143 | INFOE("Unsupport data type %d", dt); 144 | return TRT::DataType::Float; 145 | } 146 | } 147 | 148 | TRTPlugin::~TRTPlugin() { 149 | } 150 | 151 | void TRTPlugin::pluginInit(const std::string& name, const std::string& info, const std::vector>& weights) { 152 | phase_ = CompilePhase; 153 | layerName_ = name; 154 | config_ = this->new_config(); 155 | Assert(config_ != nullptr); 156 | config_->setup(info, weights); 157 | config_->init(); 158 | } 159 | 160 | void TRTPlugin::pluginInit(const std::string& name, const void* serialData, size_t serialLength) { 161 | phase_ = InferencePhase; 162 | layerName_ = name; 163 | config_ = this->new_config(); 164 | Assert(config_ != nullptr); 165 | config_->deserialize(serialData, serialLength); 166 | config_->init(); 167 | } 168 | 169 | std::shared_ptr TRTPlugin::new_config() { 170 | return std::shared_ptr(new LayerConfig()); 171 | } 172 | 173 | int TRTPlugin::getNbOutputs() const noexcept{ 174 | return config_->num_output_; 175 | } 176 | 177 | void TRTPlugin::configurePlugin( 178 | const nvinfer1::DynamicPluginTensorDesc* in, int32_t nbInputs, 179 | const nvinfer1::DynamicPluginTensorDesc* out, int32_t nbOutputs) noexcept{ 180 | 181 | auto type = in->desc.type; 182 | auto format = in->desc.format; 183 | this->config_->usage_dtype_ = convert_trt_datatype(type); 184 | this->config_->usage_plugin_format_ = format; 185 | this->config_->num_input_ = nbInputs; 186 | this->config_->max_batch_size_ = in->max.d[0]; 187 | this->config_finish(); 188 | } 189 | 190 | int TRTPlugin::initialize() noexcept{ 191 | return 0; 192 | } 193 | 194 | void TRTPlugin::terminate() noexcept{ 195 | } 196 | 197 | bool TRTPlugin::supportsFormatCombination( 198 | int32_t pos, const nvinfer1::PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept{ 199 | 200 | bool match = config_->support_dtype_set_.find(inOut[pos].type) != config_->support_dtype_set_.end() && 201 | config_->support_plugin_format_set_.find(inOut[pos].format) != config_->support_plugin_format_set_.end(); 202 | return match; 203 | } 204 | 205 | size_t TRTPlugin::getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int32_t nbInputs, const nvinfer1::PluginTensorDesc* outputs, 206 | int32_t nbOutputs) const noexcept{ 207 | return config_->workspace_size_; 208 | } 209 | 210 | int32_t TRTPlugin::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, 211 | const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept{ 212 | 213 | if (inputTensors_.empty()) { 214 | inputTensors_.resize(config_->num_input_); 215 | outputTensors_.resize(config_->num_output_); 216 | weightTensors_.resize(config_->weights_.size()); 217 | 218 | for (int i = 0; i < weightTensors_.size(); ++i) { 219 | auto& w = config_->weights_[i]; 220 | weightTensors_[i].shape_ = w->dims(); 221 | weightTensors_[i].ptr_ = w->gpu(); 222 | weightTensors_[i].dtype_ = w->type(); 223 | } 224 | } 225 | 226 | for (int i = 0; i < inputTensors_.size(); ++i) { 227 | inputTensors_[i].shape_ = std::vector(inputDesc[i].dims.d, inputDesc[i].dims.d+inputDesc[i].dims.nbDims); 228 | inputTensors_[i].ptr_ = (void*)inputs[i]; 229 | inputTensors_[i].dtype_ = convert_trt_datatype(inputDesc[i].type); 230 | } 231 | 232 | for (int i = 0; i < outputTensors_.size(); ++i) { 233 | outputTensors_[i].shape_ = std::vector(outputDesc[i].dims.d, outputDesc[i].dims.d+outputDesc[i].dims.nbDims); 234 | outputTensors_[i].ptr_ = outputs[i]; 235 | outputTensors_[i].dtype_ = convert_trt_datatype(outputDesc[i].type); 236 | } 237 | return enqueue(inputTensors_, outputTensors_, weightTensors_, workspace, stream); 238 | } 239 | 240 | size_t TRTPlugin::getSerializationSize() const noexcept{ 241 | return config_->serialize(); 242 | } 243 | 244 | void TRTPlugin::serialize(void* buffer) const noexcept{ 245 | config_->serialize_data_copy_to(buffer); 246 | } 247 | };// namespace Plugin -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/onnxplugin.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef ONNX_PLUGIN_HPP 3 | #define ONNX_PLUGIN_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include "plugin_binary_io.hpp" 16 | 17 | namespace ONNXPlugin { 18 | 19 | enum Phase { 20 | CompilePhase, 21 | InferencePhase 22 | }; 23 | 24 | struct GTensor { 25 | GTensor() {} 26 | GTensor(const TRT::Tensor& tensor); 27 | GTensor(float* ptr, int ndims, int* dims); 28 | GTensor(TRT::float16* ptr, int ndims, int* dims); 29 | 30 | int count(int start_axis = 0) const; 31 | 32 | template 33 | int offset(int index, _Args&& ... index_args) const{ 34 | const int index_array[] = {index, index_args...}; 35 | return offset_array(sizeof...(index_args) + 1, index_array); 36 | } 37 | 38 | int offset_array(const std::vector& index) const; 39 | int offset_array(size_t size, const int* index_array) const; 40 | 41 | inline int batch() const{return shape_[0];} 42 | inline int channel() const{return shape_[1];} 43 | inline int height() const{return shape_[2];} 44 | inline int width() const{return shape_[3];} 45 | 46 | template 47 | inline _T* ptr() const { return (_T*)ptr_; } 48 | 49 | template 50 | inline _T* ptr(int i, _Args&& ... args) const { return (_T*)ptr_ + offset(i, args...); } 51 | 52 | void* ptr_ = nullptr; 53 | TRT::DataType dtype_ = TRT::DataType::Float; 54 | std::vector shape_; 55 | }; 56 | 57 | struct LayerConfig { 58 | 59 | /////////////////////////////////// 60 | int num_output_ = 1; 61 | int num_input_ = 1; 62 | size_t workspace_size_ = 0; 63 | int max_batch_size_ = 0; 64 | std::set support_dtype_set_; 65 | std::set support_plugin_format_set_; 66 | 67 | std::vector> weights_; 68 | TRT::DataType usage_dtype_; 69 | nvinfer1::PluginFormat usage_plugin_format_; 70 | std::string info_; 71 | 72 | /////////////////////////////////// 73 | std::string serialize_data_; 74 | 75 | LayerConfig(); 76 | void serialize_data_copy_to(void* buffer); 77 | int serialize(); 78 | void deserialize(const void* ptr, size_t length); 79 | void setup(const std::string& info, const std::vector>& weights); 80 | virtual void seril(Plugin::BinIO& out) {} 81 | virtual void deseril(Plugin::BinIO& in) {} 82 | virtual void init(){} 83 | }; 84 | 85 | #define SetupPlugin(class_) \ 86 | virtual const char* getPluginType() const noexcept override{return #class_;}; \ 87 | virtual const char* getPluginVersion() const noexcept override{return "1";}; \ 88 | virtual nvinfer1::IPluginV2DynamicExt* clone() const noexcept override{return new class_(*this);} 89 | 90 | #define RegisterPlugin(class_) \ 91 | class class_##PluginCreator__ : public nvinfer1::IPluginCreator{ \ 92 | public: \ 93 | const char* getPluginName() const noexcept override{return #class_;} \ 94 | const char* getPluginVersion() const noexcept override{return "1";} \ 95 | const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override{return &mFieldCollection;} \ 96 | \ 97 | nvinfer1::IPluginV2DynamicExt* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override{ \ 98 | auto plugin = new class_(); \ 99 | mFieldCollection = *fc; \ 100 | mPluginName = name; \ 101 | return plugin; \ 102 | } \ 103 | \ 104 | nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override{ \ 105 | auto plugin = new class_(); \ 106 | plugin->pluginInit(name, serialData, serialLength); \ 107 | mPluginName = name; \ 108 | return plugin; \ 109 | } \ 110 | \ 111 | void setPluginNamespace(const char* libNamespace) noexcept override{mNamespace = libNamespace;} \ 112 | const char* getPluginNamespace() const noexcept override{return mNamespace.c_str();} \ 113 | \ 114 | private: \ 115 | std::string mNamespace; \ 116 | std::string mPluginName; \ 117 | nvinfer1::PluginFieldCollection mFieldCollection{0, nullptr}; \ 118 | }; \ 119 | REGISTER_TENSORRT_PLUGIN(class_##PluginCreator__); 120 | 121 | class TRTPlugin : public nvinfer1::IPluginV2DynamicExt { 122 | public: 123 | virtual nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept override{return inputTypes[0];} 124 | 125 | virtual void configurePlugin( 126 | const nvinfer1::DynamicPluginTensorDesc* in, int32_t nbInputs, 127 | const nvinfer1::DynamicPluginTensorDesc* out, int32_t nbOutputs) noexcept override; 128 | 129 | virtual void attachToContext(cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* /*allocator*/) noexcept override {} 130 | virtual void detachFromContext() noexcept override {} 131 | virtual void setPluginNamespace(const char* pluginNamespace) noexcept override{this->namespace_ = pluginNamespace;}; 132 | virtual const char* getPluginNamespace() const noexcept override{return this->namespace_.data();}; 133 | 134 | virtual ~TRTPlugin(); 135 | virtual int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) = 0; 136 | 137 | void pluginInit(const std::string& name, const std::string& info, const std::vector>& weights); 138 | void pluginInit(const std::string& name, const void* serialData, size_t serialLength); 139 | virtual void config_finish() {}; 140 | 141 | virtual std::shared_ptr new_config(); 142 | virtual bool supportsFormatCombination( 143 | int32_t pos, const nvinfer1::PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; 144 | 145 | virtual int getNbOutputs() const noexcept; 146 | virtual nvinfer1::DimsExprs getOutputDimensions( 147 | int32_t outputIndex, const nvinfer1::DimsExprs* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept = 0; 148 | 149 | virtual int initialize() noexcept; 150 | virtual void terminate() noexcept; 151 | virtual void destroy() noexcept override{} 152 | virtual size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int32_t nbInputs, const nvinfer1::PluginTensorDesc* outputs, 153 | int32_t nbOutputs) const noexcept override; 154 | 155 | virtual int32_t enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, 156 | const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; 157 | 158 | virtual size_t getSerializationSize() const noexcept override; 159 | virtual void serialize(void* buffer) const noexcept override; 160 | 161 | protected: 162 | std::string namespace_; 163 | std::string layerName_; 164 | Phase phase_ = CompilePhase; 165 | std::shared_ptr config_; 166 | std::vector inputTensors_; 167 | std::vector outputTensors_; 168 | std::vector weightTensors_; 169 | }; 170 | 171 | }; //namespace Plugin 172 | 173 | #endif //ONNX_PLUGIN_HPP -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/plugin_binary_io.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "plugin_binary_io.hpp" 3 | #include "ilogger.hpp" 4 | #include 5 | 6 | namespace Plugin{ 7 | 8 | using namespace std; 9 | 10 | BinIO::~BinIO(){ 11 | close(); 12 | } 13 | 14 | bool BinIO::opened(){ 15 | if (flag_ == MemoryRead) 16 | return memoryRead_ != nullptr; 17 | else if (flag_ == MemoryWrite) 18 | return true; 19 | return false; 20 | } 21 | 22 | void BinIO::close(){ 23 | if (flag_ == MemoryRead) { 24 | memoryRead_ = nullptr; 25 | memoryCursor_ = 0; 26 | memoryLength_ = -1; 27 | } 28 | else if (flag_ == MemoryWrite) { 29 | memoryWrite_.clear(); 30 | memoryCursor_ = 0; 31 | memoryLength_ = -1; 32 | } 33 | } 34 | 35 | string BinIO::readData(int numBytes){ 36 | string output; 37 | output.resize(numBytes); 38 | 39 | int readlen = read((void*)output.data(), output.size()); 40 | output.resize(readlen); 41 | return output; 42 | } 43 | 44 | int BinIO::read(void* pdata, size_t length){ 45 | 46 | if (flag_ == MemoryRead) { 47 | if (memoryLength_ != -1) { 48 | 49 | if (memoryLength_ < memoryCursor_ + length) { 50 | int remain = memoryLength_ - memoryCursor_; 51 | if (remain > 0) { 52 | memcpy(pdata, memoryRead_ + memoryCursor_, remain); 53 | memoryCursor_ += remain; 54 | return remain; 55 | } 56 | else { 57 | return -1; 58 | } 59 | } 60 | } 61 | memcpy(pdata, memoryRead_ + memoryCursor_, length); 62 | memoryCursor_ += length; 63 | return length; 64 | } 65 | else { 66 | return -1; 67 | } 68 | } 69 | 70 | bool BinIO::eof(){ 71 | if (!opened()) return true; 72 | 73 | if (flag_ == MemoryRead){ 74 | return this->memoryCursor_ >= this->memoryLength_; 75 | } 76 | else if (flag_ == MemoryWrite){ 77 | return false; 78 | } 79 | else { 80 | opstate_ = false; 81 | INFO("Unsupport flag: %d", flag_); 82 | return true; 83 | } 84 | } 85 | 86 | int BinIO::write(const void* pdata, size_t length){ 87 | 88 | if (flag_ == MemoryWrite) { 89 | memoryWrite_.append((char*)pdata, (char*)pdata + length); 90 | return length; 91 | } 92 | else { 93 | return -1; 94 | } 95 | } 96 | 97 | int BinIO::writeData(const string& data){ 98 | return write(data.data(), data.size()); 99 | } 100 | 101 | BinIO& BinIO::operator >> (string& value){ 102 | //read 103 | int length = 0; 104 | (*this) >> length; 105 | value = readData(length); 106 | return *this; 107 | } 108 | 109 | int BinIO::readInt(){ 110 | int value = 0; 111 | (*this) >> value; 112 | return value; 113 | } 114 | 115 | float BinIO::readFloat(){ 116 | float value = 0; 117 | (*this) >> value; 118 | return value; 119 | } 120 | 121 | BinIO& BinIO::operator << (const string& value){ 122 | //write 123 | (*this) << (int)value.size(); 124 | writeData(value); 125 | return *this; 126 | } 127 | 128 | BinIO& BinIO::operator << (const char* value){ 129 | 130 | int length = strlen(value); 131 | (*this) << (int)length; 132 | write(value, length); 133 | return *this; 134 | } 135 | 136 | BinIO& BinIO::operator << (const vector& value){ 137 | (*this) << (int)value.size(); 138 | for (int i = 0; i < value.size(); ++i){ 139 | (*this) << value[i]; 140 | } 141 | return *this; 142 | } 143 | 144 | BinIO& BinIO::operator >> (vector& value){ 145 | int num; 146 | (*this) >> num; 147 | 148 | value.resize(num); 149 | for (int i = 0; i < value.size(); ++i) 150 | (*this) >> value[i]; 151 | return *this; 152 | } 153 | 154 | bool BinIO::openMemoryRead(const void* ptr, int memoryLength) { 155 | close(); 156 | 157 | if (!ptr) return false; 158 | memoryRead_ = (const char*)ptr; 159 | memoryCursor_ = 0; 160 | memoryLength_ = memoryLength; 161 | flag_ = MemoryRead; 162 | return true; 163 | } 164 | 165 | void BinIO::openMemoryWrite() { 166 | close(); 167 | 168 | memoryWrite_.clear(); 169 | memoryCursor_ = 0; 170 | memoryLength_ = -1; 171 | flag_ = MemoryWrite; 172 | } 173 | 174 | }; // namespace Plugin -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/plugin_binary_io.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PLUGIN_BINARY_IO_HPP 2 | #define PLUGIN_BINARY_IO_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace Plugin{ 8 | 9 | class BinIO { 10 | public: 11 | enum Head { 12 | MemoryRead = 1, 13 | MemoryWrite = 2 14 | }; 15 | 16 | BinIO() { openMemoryWrite(); } 17 | BinIO(const void* ptr, int memoryLength = -1) { openMemoryRead(ptr, memoryLength); } 18 | virtual ~BinIO(); 19 | bool opened(); 20 | bool openMemoryRead(const void* ptr, int memoryLength = -1); 21 | void openMemoryWrite(); 22 | const std::string& writedMemory() { return memoryWrite_; } 23 | void close(); 24 | int write(const void* pdata, size_t length); 25 | int writeData(const std::string& data); 26 | int read(void* pdata, size_t length); 27 | std::string readData(int numBytes); 28 | int readInt(); 29 | float readFloat(); 30 | bool eof(); 31 | 32 | BinIO& operator >> (std::string& value); 33 | BinIO& operator << (const std::string& value); 34 | BinIO& operator << (const char* value); 35 | BinIO& operator << (const std::vector& value); 36 | BinIO& operator >> (std::vector& value); 37 | 38 | template 39 | BinIO& operator >> (std::vector<_T>& value) { 40 | int length = 0; 41 | (*this) >> length; 42 | 43 | value.resize(length); 44 | read(value.data(), length * sizeof(_T)); 45 | return *this; 46 | } 47 | 48 | template 49 | BinIO& operator << (const std::vector<_T>& value) { 50 | (*this) << (int)value.size(); 51 | write(value.data(), sizeof(_T) * value.size()); 52 | return *this; 53 | } 54 | 55 | template 56 | BinIO& operator >> (_T& value) { 57 | read(&value, sizeof(_T)); 58 | return *this; 59 | } 60 | 61 | template 62 | BinIO& operator << (const _T& value) { 63 | write(&value, sizeof(_T)); 64 | return *this; 65 | } 66 | 67 | bool opstate() const { 68 | return opstate_; 69 | } 70 | 71 | private: 72 | size_t readModeEndSEEK_ = 0; 73 | std::string memoryWrite_; 74 | const char* memoryRead_ = nullptr; 75 | int memoryCursor_ = 0; 76 | int memoryLength_ = -1; 77 | Head flag_ = MemoryWrite; 78 | bool opstate_ = true; 79 | }; 80 | }; // namespace Plugin 81 | 82 | #endif //PLUGIN_BINARY_IO_HPP -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/plugins/Project2Dto3D.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | 9 | // #include 10 | enum class DeviceType: int32_t { 11 | kHOST = 0, 12 | kGPU = 1 13 | }; // emum class DeviceType 14 | 15 | using namespace ONNXPlugin; 16 | 17 | 18 | 19 | 20 | __global__ void build_LUT_kernel(int32_t n_x_voxels, int32_t n_y_voxels, int32_t n_z_voxels, 21 | float* voxel_size, float* origin, float* projection, 22 | int32_t* LUT, 23 | int32_t n_images, int32_t height, int32_t width) { 24 | 25 | int32_t idx = blockIdx.x * blockDim.x + threadIdx.x; 26 | int32_t zi = idx % n_z_voxels; 27 | idx /= n_z_voxels; 28 | int32_t yi = idx % n_y_voxels; 29 | idx /= n_y_voxels; 30 | int32_t xi = idx % n_x_voxels; 31 | idx /= n_x_voxels; 32 | int32_t img = idx; 33 | // printf("Current line: %d\n", __LINE__); 34 | if (img < n_images && LUT[(xi * n_y_voxels + yi) * n_z_voxels + zi] == -1 ) { 35 | // printf("Current line: %d\n", __LINE__); 36 | 37 | float size_x = voxel_size[0]; 38 | // printf("Current line: %d\n", __LINE__); 39 | 40 | float size_y = voxel_size[1]; 41 | float size_z = voxel_size[2]; 42 | // printf("Current line: %d\n", __LINE__); 43 | 44 | float ar[3]; 45 | float pt[3]; 46 | // printf("Current line: %d\n", __LINE__); 47 | pt[0] = (xi - n_x_voxels / 2.0f) * size_x + origin[0]; 48 | pt[1] = (yi - n_y_voxels / 2.0f) * size_y + origin[1]; 49 | pt[2] = (zi - n_z_voxels / 2.0f) * size_z + origin[2]; 50 | 51 | // printf("Current line: %d\n", __LINE__); 52 | for (int i = 0; i < 3; ++i) { 53 | ar[i] = 0; 54 | for (int j = 0; j < 3; ++j) { 55 | ar[i] += projection[(img * 3 + i) * 4 + j] * pt[j]; 56 | } 57 | ar[i] += projection[((img * 3) + i) * 4 + 3]; 58 | } 59 | // printf("Current line: %d\n", __LINE__); 60 | int32_t x = round(ar[0] / ar[2]); 61 | int32_t y = round(ar[1] / ar[2]); 62 | float z = ar[2]; 63 | 64 | // printf("Current line: %d\n", __LINE__); 65 | bool fit_in = (x >= 0) && (y >= 0) && (x < width) && (y < height) && (z > 0); 66 | int32_t target; 67 | if (fit_in) { 68 | target = (img * height + y) * width + x; 69 | 70 | int offset = (xi * n_y_voxels + yi) * n_z_voxels + zi; // [xi,yi,zi] 71 | LUT[offset] = target; 72 | 73 | // valid[offset] = fit_in; 74 | // printf("Current line: %d\n", __LINE__); 75 | 76 | } 77 | else { 78 | target = -1; 79 | int offset = (xi * n_y_voxels + yi) * n_z_voxels + zi; // [xi,yi,zi] 80 | LUT[offset] = target; 81 | 82 | } 83 | 84 | // printf("Current line: %d\n", __LINE__); 85 | } 86 | 87 | } 88 | 89 | __global__ void backproject_LUT_kernel(float* features, int32_t* LUT, float* volume, 90 | size_t total_nrof_voxels, int32_t n_channels) { 91 | int32_t offset = blockIdx.x * blockDim.x + threadIdx.x; 92 | size_t nrof_float4_copies_per_iter = n_channels / 4; // We assume n_channels % 4 == 0 93 | if (offset < total_nrof_voxels) { 94 | int32_t target = LUT[offset]; 95 | if (target >= 0) { 96 | float4* src = (float4*)(features + target * n_channels); 97 | float4* dst = (float4*)(volume + offset * n_channels); 98 | for (size_t i = 0; i < nrof_float4_copies_per_iter; ++i) { 99 | dst[i] = src[i]; 100 | } 101 | } 102 | } 103 | } 104 | 105 | void backproject_LUT_CUDA(float* features_dev, int32_t* LUT_dev, float* volume_dev, 106 | int32_t n_images, int32_t n_channels, 107 | float* n_voxels) { 108 | // int32_t n_x_voxels = int32_t(n_voxels[0]); 109 | // int32_t n_y_voxels = int32_t(n_voxels[1]); 110 | // int32_t n_z_voxels = int32_t(n_voxels[2]); 111 | int32_t n_x_voxels = 200; 112 | int32_t n_y_voxels = 200; 113 | int32_t n_z_voxels = 4; 114 | size_t total_nrof_voxels = n_images * n_x_voxels * n_y_voxels * n_z_voxels; 115 | #define BLOCK_SIZE 1024 116 | dim3 thread_per_block(BLOCK_SIZE); 117 | dim3 block_per_grid((total_nrof_voxels + thread_per_block.x - 1) / thread_per_block.x); 118 | backproject_LUT_kernel<<< block_per_grid, thread_per_block >>>(features_dev, LUT_dev, volume_dev, 119 | total_nrof_voxels, n_channels 120 | ); 121 | } 122 | 123 | void backproject_LUT_GPU(float * features, int32_t * LUT, float* volume, 124 | float * n_voxels,int32_t n_images,int32_t n_channels) { 125 | backproject_LUT_CUDA(features, LUT, volume, 126 | n_images, n_channels, 127 | n_voxels 128 | ); 129 | } 130 | 131 | 132 | void build_LUT_cuda(float* n_voxels, float* voxel_size_dev, float* origin_dev, float* projection, 133 | int32_t* LUT, 134 | int32_t n_images, int32_t height, int32_t width) { 135 | // int32_t n_x_voxels = int32_t(n_voxels[0]); 136 | // int32_t n_y_voxels = int32_t(n_voxels[1]); 137 | // int32_t n_z_voxels = int32_t(n_voxels[2]); 138 | int32_t n_x_voxels = 200; 139 | int32_t n_y_voxels = 200; 140 | int32_t n_z_voxels = 4; 141 | size_t total_nrof_voxels = n_images * n_x_voxels * n_y_voxels * n_z_voxels; 142 | #define BLOCK_SIZE 1024 143 | dim3 thread_per_block(BLOCK_SIZE); 144 | dim3 block_per_grid((total_nrof_voxels + thread_per_block.x - 1) / thread_per_block.x); 145 | 146 | // printf("build here\n"); 147 | build_LUT_kernel<<< block_per_grid, thread_per_block >>>(n_x_voxels, n_y_voxels, n_z_voxels, 148 | voxel_size_dev, origin_dev, projection, 149 | LUT, 150 | n_images, height, width); 151 | } 152 | 153 | void build_LUT_GPU(float* n_voxels, float* voxel_size, float* origin, 154 | float* projection, int32_t n_images, int32_t height, int32_t width, int32_t n_channels, 155 | int32_t* LUT) { 156 | 157 | build_LUT_cuda(n_voxels, voxel_size, origin, projection, 158 | LUT, 159 | n_images, height, width 160 | ); 161 | 162 | } 163 | 164 | // 初始化空间,等价于cudaMemset(LUT,-1,lutsize*sizeof(int32_t));但是它不会产生异常 165 | __global__ void initializeWorkspaceKernel(int32_t* workspace, size_t numElements, int32_t initValue) { 166 | size_t idx = blockIdx.x * blockDim.x + threadIdx.x; 167 | 168 | if (idx < numElements) { 169 | workspace[idx] = initValue; 170 | } 171 | } 172 | void initializeWorkspace(void* workspace, size_t workspaceSize, cudaStream_t stream) { 173 | int32_t* workspaceData = static_cast(workspace); 174 | size_t numElements = workspaceSize / sizeof(int32_t); 175 | const int32_t initValue = -1; 176 | 177 | // Use a block size of 256 threads 178 | const int32_t blockSize = 1024; 179 | const int32_t numBlocks = (numElements + blockSize - 1) / blockSize; 180 | 181 | // Launch the CUDA kernel to initialize the workspace with -1 182 | initializeWorkspaceKernel<<>>(workspaceData, numElements, initValue); 183 | } 184 | 185 | 186 | class Project2Dto3D : public TRTPlugin { 187 | public: 188 | SetupPlugin(Project2Dto3D); 189 | 190 | virtual void config_finish() override{ 191 | } 192 | 193 | virtual std::shared_ptr new_config() override{ 194 | auto cfg = TRTPlugin::new_config(); 195 | cfg->support_dtype_set_ = {nvinfer1::DataType::kFLOAT}; 196 | 197 | return cfg; 198 | } 199 | 200 | size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int32_t nbInputs, const nvinfer1::PluginTensorDesc* outputs,int32_t nbOutputs) const noexcept 201 | { 202 | int32_t nSlices = outputs[0].dims.d[0] *outputs[0].dims.d[1] *outputs[0].dims.d[2] *outputs[0].dims.d[3]; 203 | printf("nSlices %d \n",nSlices); 204 | return nSlices * sizeof(int32_t); 205 | } 206 | 207 | 208 | nvinfer1::DimsExprs getOutputDimensions( 209 | int32_t outputIndex, const nvinfer1::DimsExprs* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept{ 210 | nvinfer1::DimsExprs output_dims; 211 | std::vector n_voxels{200, 200, 4}; 212 | output_dims.nbDims = 4; 213 | output_dims.d[0] = exprBuilder.constant(n_voxels[0]); 214 | output_dims.d[1] = exprBuilder.constant(n_voxels[1]); 215 | output_dims.d[2] = exprBuilder.constant(n_voxels[2]); 216 | output_dims.d[3] = inputs[0].d[3]; //64 217 | 218 | return output_dims; 219 | } 220 | 221 | void cal_debug(float *input, int32_t size,int line,cudaStream_t stream,std::string input_name="") { 222 | 223 | std::cout << "================" << std::endl; 224 | std::cout << "Input parameter name: " << input_name << std::endl; 225 | 226 | cudaStreamSynchronize(stream); 227 | float * cal_features; 228 | cal_features = (float*)malloc(size*sizeof(float)); 229 | cudaMemcpy(cal_features, input, size*sizeof(float), cudaMemcpyDeviceToHost); 230 | 231 | std::cout.setf(std::ios::fixed,std::ios::floatfield); 232 | std::cout.precision(4); 233 | auto print_size = size>100?100:size; 234 | for(int i=0;i0? cal_features[i]:-cal_features[i]); // 将当前元素加入到总和中 250 | if (i == 0 || cal_features[i] > max) { // 如果当前元素大于最大值,或者是第一个元素 251 | max = cal_features[i]; // 更新最大值 252 | } 253 | if (i == 0 || cal_features[i] < min) { // 如果当前元素小于最小值,或者是第一个元素 254 | min = cal_features[i]; // 更新最小值 255 | } 256 | 257 | // outfile<0? cal_features[i]:-cal_features[i]); // 将当前元素加入到总和中 298 | if (i == 0 || cal_features[i] > max) { // 如果当前元素大于最大值,或者是第一个元素 299 | max = cal_features[i]; // 更新最大值 300 | } 301 | if (i == 0 || cal_features[i] < min) { // 如果当前元素小于最小值,或者是第一个元素 302 | min = cal_features[i]; // 更新最小值 303 | } 304 | 305 | // outfile<& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override{ 323 | // std::chrono::high_resolution_clock::time_point t1, t2; 324 | 325 | ///////////////////////////// 326 | ///////////////////////////// 327 | // 各种初始化,各种各种 328 | auto &features_tensor = inputs[0]; 329 | auto ¶m_tensor = weights[0]; 330 | auto &volume_output = outputs[0]; 331 | 332 | if (config_->usage_dtype_ == TRT::DataType::Float) { 333 | } 334 | else if (config_->usage_dtype_ == TRT::DataType::Float16) { // TODO FP16需要数据流转,很不合理,待优化FP16Plugin 335 | return 1; 336 | INFOF("not implement function"); 337 | } 338 | 339 | int32_t n_images = features_tensor.shape_[0]; 340 | int32_t height = features_tensor.shape_[1]; 341 | int32_t width = features_tensor.shape_[2]; 342 | int32_t n_channels = features_tensor.shape_[3]; 343 | 344 | float * features = features_tensor.ptr(); 345 | 346 | float * param_ = param_tensor.ptr(); 347 | 348 | float * n_voxels_float = param_; 349 | float * voxel_size_tensor = n_voxels_float + 3; 350 | float * origin_tensor = voxel_size_tensor +3; 351 | float * projection_tensor = origin_tensor + 3; 352 | 353 | 354 | int32_t *LUT = (int32_t *)workspace; 355 | size_t lutsize = volume_output.shape_[0]*volume_output.shape_[1]*volume_output.shape_[2]*volume_output.shape_[3]; 356 | 357 | 358 | ///////////////////////////// 359 | // 初始化LUT -1 360 | initializeWorkspace(LUT, lutsize, stream); 361 | 362 | ///////////////////////////// 363 | // 创建LUT 映射表 TODO代优化 初步思路在pytorch中实现,然后传进来 364 | build_LUT_GPU(n_voxels_float, voxel_size_tensor, origin_tensor, projection_tensor, 365 | n_images, height, width, n_channels, LUT); 366 | 367 | 368 | ////////////////////////////////// 369 | // 投影 370 | backproject_LUT_GPU(features, LUT, volume_output.ptr(), n_voxels_float,n_images, n_channels); 371 | 372 | 373 | return 0; 374 | } 375 | }; 376 | 377 | RegisterPlugin(Project2Dto3D); --------------------------------------------------------------------------------