├── .gitignore ├── Makefile ├── README.cn.md ├── README.md ├── README.onnx.plugin.md ├── README.onnx.plugin.pdf ├── TensorRT.sln ├── TensorRT.vcxproj ├── TensorRT.vcxproj.filters ├── TensorRT.vcxproj.user ├── dbface └── DBFace.py ├── dcn_onnx ├── DCNv2 │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── dcn_v2.py │ ├── make.sh │ ├── setup.py │ ├── src │ │ ├── cpu │ │ │ ├── dcn_v2_cpu.cpp │ │ │ └── vision.h │ │ ├── cuda │ │ │ ├── dcn_v2_cuda.cu │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ └── vision.h │ │ ├── dcn_v2.h │ │ └── vision.cpp │ └── test.py ├── README.md ├── dcn_v2.py ├── dladcn_export_onnx.py └── pose_dla_dcn.py ├── lean ├── .gitignore └── README.md ├── plugin_onnx_export.py ├── scripts ├── getALL.sh ├── getCenterTrack.sh ├── getDBFace.sh └── getDLADCN.sh ├── src ├── builder │ ├── trt_builder.cpp │ └── trt_builder.hpp ├── caffeplugin │ ├── caffeplugin.cpp │ ├── caffeplugin.hpp │ └── plugins │ │ ├── ChannelMultiplicationLayer.cu │ │ ├── ChannelMultiplicationLayer.hpp │ │ ├── ClipLayer.cu │ │ ├── ClipLayer.hpp │ │ ├── DCNLayer.cu │ │ ├── DCNLayer.hpp │ │ ├── PlexShuffleLayer.cu │ │ ├── PlexShuffleLayer.hpp │ │ ├── TestPlugin.cu │ │ └── TestPlugin.hpp ├── common │ ├── cc_util.cpp │ ├── cc_util.hpp │ ├── json.cpp │ ├── json.hpp │ ├── trt_common.cpp │ └── trt_common.hpp ├── examples │ ├── center_net_coco2x_dcn.cpp │ ├── center_track_coco_tracking.cpp │ ├── dbface.cpp │ └── onnx.cpp ├── import_lib.cpp ├── infer │ ├── ct_detect_backend.cu │ ├── ct_detect_backend.hpp │ ├── dbface_backend.cu │ ├── dbface_backend.hpp │ ├── task_pool.hpp │ ├── trt_backend.cpp │ ├── trt_backend.hpp │ ├── trt_infer.cpp │ ├── trt_infer.hpp │ └── trt_infer_norm.cu ├── main.cpp ├── onnx │ ├── onnx-operators_ONNX_NAMESPACE-ml.pb.cpp │ ├── onnx-operators_ONNX_NAMESPACE-ml.pb.h │ ├── onnx_ONNX_NAMESPACE-ml.pb.cpp │ ├── onnx_ONNX_NAMESPACE-ml.pb.h │ ├── onnx_pb.h │ └── onnxifi.h ├── onnx_parser │ ├── ImporterContext.hpp │ ├── InstanceNormalization.cpp │ ├── InstanceNormalization.hpp │ ├── ModelImporter.cpp │ ├── ModelImporter.hpp │ ├── NvOnnxParser.cpp │ ├── NvOnnxParser.h │ ├── NvOnnxParserTypedefs.h │ ├── OnnxAttrs.cpp │ ├── OnnxAttrs.hpp │ ├── ResizeNearest.cu │ ├── ResizeNearest.hpp │ ├── ShapedWeights.cpp │ ├── ShapedWeights.hpp │ ├── Split.cu │ ├── Split.hpp │ ├── Status.hpp │ ├── TensorOrWeights.hpp │ ├── builtin_op_importers.cpp │ ├── builtin_op_importers.hpp │ ├── common.hpp │ ├── onnx2trt.hpp │ ├── onnx2trt_common.hpp │ ├── onnx2trt_utils.cpp │ ├── onnx2trt_utils.hpp │ ├── onnx_utils.hpp │ ├── plugin.cpp │ ├── plugin.hpp │ ├── plugin_common.hpp │ ├── serialize.hpp │ ├── toposort.hpp │ ├── trt_utils.hpp │ └── utils.hpp └── onnxplugin │ ├── onnxplugin.cpp │ ├── onnxplugin.hpp │ └── plugins │ ├── DCNv2.cu │ ├── DCNv2.hpp │ ├── HSigmoid.cu │ ├── HSigmoid.hpp │ ├── HSwish.cu │ ├── HSwish.hpp │ ├── MReLU.cu │ └── MReLU.hpp └── workspace ├── imgs ├── 000020.jpg ├── 000023.jpg ├── 17790319373_bd19b24cfc_k.jpg ├── selfie.jpg └── www.jpg ├── logs ├── 2020-04-15.log └── 2020-04-17.log ├── models ├── .gitignore └── demo.onnx └── results ├── 0.centernet.coco2x.dcn.jpg ├── 1.centernet.coco2x.dcn.jpg ├── coco.tracking.jpg └── selfie.draw.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | **/.trtmodel 2 | /build 3 | .vs 4 | **/*.pdb 5 | **/*.exe 6 | **/*.ilk 7 | **/*.suo 8 | objs -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ECHO = @echo 2 | OUTNAME = trtrun 3 | CC := g++ 4 | CUCC := nvcc 5 | SRCDIR := src 6 | OBJDIR := objs 7 | LEAN := /datav/newbb/lean 8 | #BINDIR := $(LEAN)/tensorRTIntegrate 9 | BINDIR := workspace 10 | 11 | TENSORRT_NAME := TensorRT-7.0.0.11 12 | #TENSORRT_NAME := TensorRT-6.0.1.8-cuda10.2-cudnn7.6 13 | CFLAGS := -std=c++11 -fPIC -m64 -g -O3 -fopenmp -w -DONNX_ML -DNDEBUG 14 | CUFLAGS := -std=c++11 -m64 -Xcompiler -fPIC -g -O3 -w -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_61,code=sm_61 15 | INC_OPENCV := $(LEAN)/opencv4.2.0/include/opencv4 16 | INC_LOCAL := ./src ./src/builder ./src/common ./src/infer ./src/plugin ./src/plugin/plugins 17 | INC_SYS := /usr/local/protobuf/include 18 | INC_CUDA := $(LEAN)/cuda10.2/include $(LEAN)/$(TENSORRT_NAME)/include $(LEAN)/cudnn7.6.5.32-cuda10.2 19 | INCS := $(INC_SYS) $(INC_OPENCV) $(INC_LOCAL) $(INC_CUDA) 20 | INCS := $(foreach inc, $(INCS), -I$(inc)) 21 | 22 | LIB_CUDA := $(LEAN)/cuda10.2/lib $(LEAN)/$(TENSORRT_NAME)/lib $(LEAN)/cudnn7.6.5.32-cuda10.2 23 | LIB_SYS := /usr/local/protobuf/lib 24 | LIB_OPENCV := $(LEAN)/opencv4.2.0/lib 25 | LIBS := $(LIB_SYS) $(LIB_CUDA) $(LIB_OPENCV) 26 | LIBS := $(foreach lib, $(LIBS),-L$(lib)) 27 | 28 | RPATH := $(LIB_SYS) $(LIB_CUDA) $(LIB_OPENCV) 29 | RPATH := $(foreach lib, $(RPATH),-Wl,-rpath=$(lib)) 30 | 31 | LD_OPENCV := opencv_core opencv_highgui opencv_imgproc opencv_video opencv_videoio opencv_imgcodecs 32 | LD_NVINFER := nvinfer nvinfer_plugin nvparsers 33 | LD_CUDA := cuda curand cublas cudart cudnn 34 | LD_SYS := stdc++ 35 | LDS := $(LD_SYS) $(LD_OPENCV) $(LD_NVINFER) $(LD_CUDA) 36 | LDS := $(foreach lib, $(LDS), -l$(lib)) 37 | 38 | SRCS := $(shell cd $(SRCDIR) && find -name "*.cpp") 39 | OBJS := $(patsubst %.cpp,%.o,$(SRCS)) 40 | OBJS := $(foreach item,$(OBJS),$(OBJDIR)/$(item)) 41 | CUS := $(shell cd $(SRCDIR) && find -name "*.cu") 42 | CUOBJS := $(patsubst %.cu,%.o,$(CUS)) 43 | CUOBJS := $(foreach item,$(CUOBJS),$(OBJDIR)/$(item)) 44 | OBJS := $(subst /./,/,$(OBJS)) 45 | CUOBJS := $(subst /./,/,$(CUOBJS)) 46 | 47 | all: $(BINDIR)/$(OUTNAME) 48 | $(ECHO) Done, now you can run this program with \"make run\" command. 49 | 50 | run: all 51 | @cd $(BINDIR) && ./$(OUTNAME) 52 | 53 | $(BINDIR)/$(OUTNAME): $(OBJS) $(CUOBJS) 54 | $(ECHO) Linking: $@ 55 | @g++ $(LIBS) -o $@ $^ $(LDS) $(RPATH) -pthread -lprotobuf 56 | 57 | $(CUOBJS) : $(OBJDIR)/%.o : $(SRCDIR)/%.cu 58 | @if [ ! -d $@ ]; then mkdir -p $(dir $@); fi 59 | $(ECHO) Compiling: $< 60 | @$(CUCC) $(CUFLAGS) $(INCS) -c -o $@ $< 61 | 62 | $(OBJS) : $(OBJDIR)/%.o : $(SRCDIR)/%.cpp 63 | @if [ ! -d $@ ]; then mkdir -p $(dir $@); fi 64 | $(ECHO) Compiling: $< 65 | @$(CC) $(CFLAGS) $(INCS) -c -o $@ $< 66 | 67 | clean: 68 | rm -rf $(OBJDIR) $(BINDIR)/$(OUTNAME) 69 | -------------------------------------------------------------------------------- /README.cn.md: -------------------------------------------------------------------------------- 1 | # TensorRT 2 | 3 | --- 4 | * 1、支持OnnX的插件开发,并且实现[CenterNet](https://github.com/xingyizhou/CenterNet)的DCNv2插件demo(fp32/fp16)和Inference实现,附有案例 5 | * 2、不建议使用pytorch->caffemodel->tensorRT,改用pytorch->onnx->tensorRT,对于任何特定需求(例如dcn、例如双线性插值),可以用插件实现 6 | * 3、如果不用这里提供的框架,自己实现onnx插件,这里有[一份指导](README.onnx.plugin.md),说明了关键点,可以做参考 7 | * 4、视频讲解点击这里:https://www.bilibili.com/video/BV1Pe411x7qr 8 | 9 | 10 | ## 复现centerNetDCN的检测结果 11 | ![image1](workspace/centernet.coco2x.dcn.17790319373_bd19b24cfc_k.jpg) 12 | 13 |
14 | 15 | ## 复现centerTrack的结果 16 | 17 | 18 | 19 | ![image1](workspace/coco.tracking.jpg) 20 | 21 |
22 | 23 | ## 复现DBFace 24 | 25 | ![dbface](workspace/selfie.draw.jpg) 26 | 27 | 28 | 29 | 30 | ## 快速使用 31 | * 安装protobuf v3.8.x,点击[README.onnx.plugin.md](README.onnx.plugin.md)有提到怎么装 32 | ```bash 33 | bash getDLADCN.sh 34 | make run -j32 35 | ``` 36 | 37 | --- 38 | ## 案例-Inference 39 | ``` 40 | auto engine = TRTInfer::loadEngine("models/efficientnet-b0.fp32.trtmodel"); 41 | float mean[3] = {0.485, 0.456, 0.406}; 42 | float std[3] = {0.229, 0.224, 0.225}; 43 | Mat image = imread("img.jpg"); 44 | engine->input()->setNormMat(0, image, mean, std); 45 | engine->forward(); 46 | engine->output(0)->print(); 47 | ``` 48 | 49 | ## 环境-Windows 50 | * tensorRT7.0.0.11 (如果修改为6或者其他版本,可能会面临一点改动) 51 | * opencv3.4.6(可以任意修改为其他版本) 52 | * cudnn7.6.3(可以任意修改为其他版本) 53 | * cuda10.0(可以任意修改为其他版本) 54 | * protobuf v3.8.x 55 | * Visual Studio 2017(可以用其他版本打开,但需要修改对应opencv版本) 56 | * 如果要修改版本,你需要下载cuda/cudnn/tensorRT三者同时匹配的版本,因为他们互相存在依赖,否则只要你是cuda10.0就可以很轻易编译这个项目 57 | * Windows下的[依赖库lean.zip下载](http://zifuture.com:1000/fs/25.shared/lean.zip) 58 | --- 59 | 60 | 61 | ## 环境-Linux 62 | * protobuf v3.8.x 63 | * cuda10.2 (可以任意修改为其他版本) 64 | * cudnn7.6.5.32-cuda10.2 (可以任意修改为其他版本) 65 | * opencv4.2.0 (可以任意修改为其他版本) 66 | * TensorRT-7.0.0.11 (如果修改为6或者其他版本,可能会面临一点改动) 67 | --- 68 | 69 | 70 | ## 说明 71 | * pytorch到onnx(autograd.Function类特殊自定义实现函数导出成插件),参考[plugin_onnx_export.py](plugin_onnx_export.py) 72 | * onnx插件MReLU参考[MReLU.cu](src/onnxplugin/plugins/MReLU.cu),和HSwish参考[HSwish.cu](src/onnxplugin/plugins/HSwish.cu) 73 | * src/plugin底下的插件是实现caffemodel的插件方法,与onnx不兼容,并且不被推荐使用 74 | * int8已经失效,如果需要int8,可以使用[之前的版本并替换为tensorRT6.0](https://github.com/dlunion/tensorRTIntegrate/tree/59e933efc8011bc304d3ccd9fdd1d6cbc7b2e9a0) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 这个版本已经废弃,最新版本,请移步 2 | - https://github.com/shouxieai/tensorRT_cpp 3 | - 新版本支持了最新版的tensorRT、yolov5,替换了新的解析器,模型编译报错更少 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | # YoloV5 Support 12 | http://zifuture.com:1556/fs/16.std/release_tensorRT_yolov5.zip 13 | 14 | 15 | # TensorRT-Integrate 16 | 17 | 1. Support pytorch onnx plugin(DCN、HSwish ... etc.) 18 | 2. Simpler inference and plugin APIs 19 | 20 |
21 | 22 | 23 | ## Re-implement 24 | ##### [CenterNet : ctdet_coco_dla_2x](https://github.com/xingyizhou/CenterNet) 25 | 26 | ![image1](workspace/results/1.centernet.coco2x.dcn.jpg) 27 | 28 |
29 | 30 | ##### [CenterTrack: coco_tracking](https://github.com/xingyizhou/CenterTrack) 31 | 32 | ![coco.tracking.jpg](workspace/results/coco.tracking.jpg) 33 | 34 | * [coco_tracking.onnx download](http://zifuture.com:1556/fs/public_models/coco_tracking.onnx) 35 | 36 | * [nuScenes_3Dtracking.onnx download](http://zifuture.com:1556/fs/public_models/nuScenes_3Dtracking.onnx) 37 | 38 |
39 | 40 | ##### [DBFace](https://github.com/dlunion/DBFace) 41 | 42 | ![selfie.draw.jpg](workspace/results/selfie.draw.jpg) 43 | 44 | 45 | 46 | ## Use TensorRT-Integrate 47 | 48 | install protobuf == 3.11.4 (or >= 3.8.x, But it's more troublesome) 49 | 50 | ```bash 51 | bash scripts/getALL.sh 52 | make run -j32 53 | ``` 54 | 55 |
56 | 57 | ## Inference Code 58 | 59 | ``` 60 | auto engine = TRTInfer::loadEngine("models/efficientnet-b0.fp32.trtmodel"); 61 | float mean[3] = {0.485, 0.456, 0.406}; 62 | float std[3] = {0.229, 0.224, 0.225}; 63 | Mat image = imread("img.jpg"); 64 | auto input = engine->input(); 65 | 66 | // multi batch sample 67 | input->resize(2); 68 | input->setNormMatGPU(0, image, mean, std); 69 | input->setNormMatGPU(1, image, mean, std); 70 | 71 | engine->forward(); 72 | 73 | // get result and copy to cpu 74 | engine->output(0)->cpu(); 75 | engine->tensor("hm")->cpu(); 76 | ``` 77 | 78 |
79 | 80 | ## Environment 81 | 82 | * tensorRT7.0 or tensorRT6.0 83 | * opencv3.4.6 84 | * cudnn7.6.3 85 | * cuda10.0 86 | * protobuf v3.8.x 87 | * Visual Studio 2017 88 | * [lean-windows.zip (include tensorRT、opencv、cudnn、cuda、protobuf)](http://zifuture.com:1556/fs/25.shared/lean.zip) 89 | 90 |
91 | 92 | ## Plugin 93 | 94 | 1. Pytorch export ONNX: [plugin_onnx_export.py](plugin_onnx_export.py) 95 | 2. [MReLU.cu](src/onnxplugin/plugins/MReLU.cu) 、[HSwish.cu](src/onnxplugin/plugins/HSwish.cu)、[DCNv2.cu](src/onnxplugin/plugins/DCNv2.cu) 96 | 97 |
98 | -------------------------------------------------------------------------------- /README.onnx.plugin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/README.onnx.plugin.pdf -------------------------------------------------------------------------------- /TensorRT.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28307.136 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TensorRT", "TensorRT.vcxproj", "{FBF775F5-DAB4-4BC1-97A9-D36301073438}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {FBF775F5-DAB4-4BC1-97A9-D36301073438}.Debug|x64.ActiveCfg = Debug|x64 15 | {FBF775F5-DAB4-4BC1-97A9-D36301073438}.Debug|x64.Build.0 = Debug|x64 16 | {FBF775F5-DAB4-4BC1-97A9-D36301073438}.Release|x64.ActiveCfg = Release|x64 17 | {FBF775F5-DAB4-4BC1-97A9-D36301073438}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {679F35F0-20AA-4D18-8610-D369E2BE97E8} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /TensorRT.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | workspace 5 | PATH=$(projectDir)lean/cuda10.0/bin;$(projectDir)lean/opencv3.4.6/lib;$(projectDir)lean/cudnn7.6.3;$(projectDir)lean/TensorRT-7.0.0.11/lib 6 | WindowsLocalDebugger 7 | false 8 | 9 | 10 | workspace 11 | PATH=$(projectDir)lean/cuda10.0/bin;$(projectDir)lean/opencv3.4.6/lib;$(projectDir)lean/cudnn7.6.3;$(projectDir)lean/TensorRT-7.0.0.11/lib 12 | WindowsLocalDebugger 13 | false 14 | 15 | -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | *.so 4 | *.o 5 | *pyc 6 | _ext 7 | build 8 | DCNv2.egg-info 9 | dist -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python test.py # run examples and gradient check 7 | ``` 8 | 9 | ### An Example 10 | - deformable conv 11 | ```python 12 | from dcn_v2 import DCN 13 | input = torch.randn(2, 64, 128, 128).cuda() 14 | # wrap all things (offset and mask) in DCN 15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda() 16 | output = dcn(input) 17 | print(output.shape) 18 | ``` 19 | - deformable roi pooling 20 | ```python 21 | from dcn_v2 import DCNPooling 22 | input = torch.randn(2, 32, 64, 64).cuda() 23 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 24 | x = torch.randint(256, (20, 1)).cuda().float() 25 | y = torch.randint(256, (20, 1)).cuda().float() 26 | w = torch.randint(64, (20, 1)).cuda().float() 27 | h = torch.randint(64, (20, 1)).cuda().float() 28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 29 | 30 | # mdformable pooling (V2) 31 | # wrap all things (offset and mask) in DCNPooling 32 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 33 | pooled_size=7, 34 | output_dim=32, 35 | no_trans=False, 36 | group_size=1, 37 | trans_std=0.1).cuda() 38 | 39 | dout = dpooling(input, rois) 40 | ``` 41 | ### Note 42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with, 43 | ```bash 44 | git checkout pytorch_0.4 45 | ``` 46 | 47 | ### Known Issues: 48 | 49 | - [x] Gradient check w.r.t offset (solved) 50 | - [ ] Backward is not reentrant (minor) 51 | 52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 53 | 54 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes. 55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some 56 | non-differential points? 57 | 58 | Update: all gradient check passes with double precision. 59 | 60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 61 | float `<1e-15` for double), 62 | so it may not be a serious problem (?) 63 | 64 | Please post an issue or PR if you have any comments. 65 | -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/dcn_onnx/DCNv2/__init__.py -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python setup.py build develop 3 | -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import glob 5 | 6 | import torch 7 | 8 | from torch.utils.cpp_extension import CUDA_HOME 9 | from torch.utils.cpp_extension import CppExtension 10 | from torch.utils.cpp_extension import CUDAExtension 11 | 12 | from setuptools import find_packages 13 | from setuptools import setup 14 | 15 | requirements = ["torch", "torchvision"] 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "src") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | extra_compile_args = {"cxx": []} 28 | define_macros = [] 29 | 30 | if torch.cuda.is_available() and CUDA_HOME is not None: 31 | extension = CUDAExtension 32 | sources += source_cuda 33 | define_macros += [("WITH_CUDA", None)] 34 | extra_compile_args["nvcc"] = [ 35 | "-DCUDA_HAS_FP16=1", 36 | "-D__CUDA_NO_HALF_OPERATORS__", 37 | "-D__CUDA_NO_HALF_CONVERSIONS__", 38 | "-D__CUDA_NO_HALF2_OPERATORS__", 39 | ] 40 | else: 41 | raise NotImplementedError('Cuda is not availabel') 42 | 43 | sources = [os.path.join(extensions_dir, s) for s in sources] 44 | include_dirs = [extensions_dir, "/usr/local/cuda-10.1/include"] 45 | ext_modules = [ 46 | extension( 47 | "_ext", 48 | sources, 49 | include_dirs=include_dirs, 50 | define_macros=define_macros, 51 | extra_compile_args=extra_compile_args, 52 | ) 53 | ] 54 | return ext_modules 55 | 56 | setup( 57 | name="DCNv2", 58 | version="0.1", 59 | author="charlesshang", 60 | url="https://github.com/charlesshang/DCNv2", 61 | description="deformable convolutional networks", 62 | packages=find_packages(exclude=("configs", "tests",)), 63 | # install_requires=requirements, 64 | ext_modules=get_extensions(), 65 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 66 | ) -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/src/cpu/dcn_v2_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | 7 | at::Tensor 8 | dcn_v2_cpu_forward(const at::Tensor &input, 9 | const at::Tensor &weight, 10 | const at::Tensor &bias, 11 | const at::Tensor &offset, 12 | const at::Tensor &mask, 13 | const int kernel_h, 14 | const int kernel_w, 15 | const int stride_h, 16 | const int stride_w, 17 | const int pad_h, 18 | const int pad_w, 19 | const int dilation_h, 20 | const int dilation_w, 21 | const int deformable_group) 22 | { 23 | AT_ERROR("Not implement on cpu"); 24 | } 25 | 26 | std::vector 27 | dcn_v2_cpu_backward(const at::Tensor &input, 28 | const at::Tensor &weight, 29 | const at::Tensor &bias, 30 | const at::Tensor &offset, 31 | const at::Tensor &mask, 32 | const at::Tensor &grad_output, 33 | int kernel_h, int kernel_w, 34 | int stride_h, int stride_w, 35 | int pad_h, int pad_w, 36 | int dilation_h, int dilation_w, 37 | int deformable_group) 38 | { 39 | AT_ERROR("Not implement on cpu"); 40 | } 41 | 42 | std::tuple 43 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 44 | const at::Tensor &bbox, 45 | const at::Tensor &trans, 46 | const int no_trans, 47 | const float spatial_scale, 48 | const int output_dim, 49 | const int group_size, 50 | const int pooled_size, 51 | const int part_size, 52 | const int sample_per_part, 53 | const float trans_std) 54 | { 55 | AT_ERROR("Not implement on cpu"); 56 | } 57 | 58 | std::tuple 59 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 60 | const at::Tensor &input, 61 | const at::Tensor &bbox, 62 | const at::Tensor &trans, 63 | const at::Tensor &top_count, 64 | const int no_trans, 65 | const float spatial_scale, 66 | const int output_dim, 67 | const int group_size, 68 | const int pooled_size, 69 | const int part_size, 70 | const int sample_per_part, 71 | const float trans_std) 72 | { 73 | AT_ERROR("Not implement on cpu"); 74 | } -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/src/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cpu_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cpu_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | 64 | #ifndef DCN_V2_IM2COL_CUDA 65 | #define DCN_V2_IM2COL_CUDA 66 | 67 | #ifdef __cplusplus 68 | extern "C" 69 | { 70 | #endif 71 | 72 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 73 | const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 81 | const float *data_col, const float *data_offset, const float *data_mask, 82 | const int batch_size, const int channels, const int height_im, const int width_im, 83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 85 | const int dilation_h, const int dilation_w, 86 | const int deformable_group, float *grad_im); 87 | 88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 90 | const int batch_size, const int channels, const int height_im, const int width_im, 91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 93 | const int dilation_h, const int dilation_w, 94 | const int deformable_group, 95 | float *grad_offset, float *grad_mask); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/src/cuda/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cuda_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cuda_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | at::Tensor 10 | dcn_v2_forward(const at::Tensor &input, 11 | const at::Tensor &weight, 12 | const at::Tensor &bias, 13 | const at::Tensor &offset, 14 | const at::Tensor &mask, 15 | const int kernel_h, 16 | const int kernel_w, 17 | const int stride_h, 18 | const int stride_w, 19 | const int pad_h, 20 | const int pad_w, 21 | const int dilation_h, 22 | const int dilation_w, 23 | const int deformable_group) 24 | { 25 | if (input.type().is_cuda()) 26 | { 27 | #ifdef WITH_CUDA 28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask, 29 | kernel_h, kernel_w, 30 | stride_h, stride_w, 31 | pad_h, pad_w, 32 | dilation_h, dilation_w, 33 | deformable_group); 34 | #else 35 | AT_ERROR("Not compiled with GPU support"); 36 | #endif 37 | } 38 | AT_ERROR("Not implemented on the CPU"); 39 | } 40 | 41 | std::vector 42 | dcn_v2_backward(const at::Tensor &input, 43 | const at::Tensor &weight, 44 | const at::Tensor &bias, 45 | const at::Tensor &offset, 46 | const at::Tensor &mask, 47 | const at::Tensor &grad_output, 48 | int kernel_h, int kernel_w, 49 | int stride_h, int stride_w, 50 | int pad_h, int pad_w, 51 | int dilation_h, int dilation_w, 52 | int deformable_group) 53 | { 54 | if (input.type().is_cuda()) 55 | { 56 | #ifdef WITH_CUDA 57 | return dcn_v2_cuda_backward(input, 58 | weight, 59 | bias, 60 | offset, 61 | mask, 62 | grad_output, 63 | kernel_h, kernel_w, 64 | stride_h, stride_w, 65 | pad_h, pad_w, 66 | dilation_h, dilation_w, 67 | deformable_group); 68 | #else 69 | AT_ERROR("Not compiled with GPU support"); 70 | #endif 71 | } 72 | AT_ERROR("Not implemented on the CPU"); 73 | } 74 | 75 | std::tuple 76 | dcn_v2_psroi_pooling_forward(const at::Tensor &input, 77 | const at::Tensor &bbox, 78 | const at::Tensor &trans, 79 | const int no_trans, 80 | const float spatial_scale, 81 | const int output_dim, 82 | const int group_size, 83 | const int pooled_size, 84 | const int part_size, 85 | const int sample_per_part, 86 | const float trans_std) 87 | { 88 | if (input.type().is_cuda()) 89 | { 90 | #ifdef WITH_CUDA 91 | return dcn_v2_psroi_pooling_cuda_forward(input, 92 | bbox, 93 | trans, 94 | no_trans, 95 | spatial_scale, 96 | output_dim, 97 | group_size, 98 | pooled_size, 99 | part_size, 100 | sample_per_part, 101 | trans_std); 102 | #else 103 | AT_ERROR("Not compiled with GPU support"); 104 | #endif 105 | } 106 | AT_ERROR("Not implemented on the CPU"); 107 | } 108 | 109 | std::tuple 110 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad, 111 | const at::Tensor &input, 112 | const at::Tensor &bbox, 113 | const at::Tensor &trans, 114 | const at::Tensor &top_count, 115 | const int no_trans, 116 | const float spatial_scale, 117 | const int output_dim, 118 | const int group_size, 119 | const int pooled_size, 120 | const int part_size, 121 | const int sample_per_part, 122 | const float trans_std) 123 | { 124 | if (input.type().is_cuda()) 125 | { 126 | #ifdef WITH_CUDA 127 | return dcn_v2_psroi_pooling_cuda_backward(out_grad, 128 | input, 129 | bbox, 130 | trans, 131 | top_count, 132 | no_trans, 133 | spatial_scale, 134 | output_dim, 135 | group_size, 136 | pooled_size, 137 | part_size, 138 | sample_per_part, 139 | trans_std); 140 | #else 141 | AT_ERROR("Not compiled with GPU support"); 142 | #endif 143 | } 144 | AT_ERROR("Not implemented on the CPU"); 145 | } -------------------------------------------------------------------------------- /dcn_onnx/DCNv2/src/vision.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dcn_v2.h" 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward"); 6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward"); 7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward"); 8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward"); 9 | } 10 | -------------------------------------------------------------------------------- /dcn_onnx/README.md: -------------------------------------------------------------------------------- 1 | ## 说明 2 | 1. 下载官方的代码 3 | 2. 下载模型 4 | 3. 替换这里的几个py(请着重看一下这几个py里面,用==================修改的部分===================做的标记,没有修改太多,可以自己尝试改下) 5 | 4. 执行dladcn_export_onnx.py,导出onnx即可 -------------------------------------------------------------------------------- /dcn_onnx/dladcn_export_onnx.py: -------------------------------------------------------------------------------- 1 | 2 | # 请下载官方的代码,然后执行这个就可以生成了 3 | import numpy as np 4 | import torch 5 | import torch.onnx.utils as onnx 6 | import models.networks.pose_dla_dcn as net 7 | from collections import OrderedDict 8 | import cv2 9 | 10 | model = net.get_pose_net(num_layers=34, heads={'hm': 80, 'wh': 2, 'reg': 2}) 11 | 12 | # https://github.com/xingyizhou/CenterNet/blob/master/readme/MODEL_ZOO.md 这里下载的 13 | # 如果下载不了,可以尝试我提供的连接:http://zifuture.com:1000/fs/public_models/ctdet_coco_dla_2x.pth 14 | checkpoint = torch.load(r"ctdet_coco_dla_2x.pth", map_location="cpu") 15 | checkpoint = checkpoint["state_dict"] 16 | change = OrderedDict() 17 | for key, op in checkpoint.items(): 18 | change[key.replace("module.", "", 1)] = op 19 | 20 | model.load_state_dict(change) 21 | model.eval() 22 | model.cuda() 23 | 24 | input = torch.zeros((1, 3, 32, 32)).cuda() 25 | 26 | # 有个已经导出好的模型:http://zifuture.com:1000/fs/public_models/dladcnv2.onnx 27 | onnx.export(model, (input), "dladcnv2.onnx", output_names=["hm", "wh", "reg", "hm_pool"], verbose=True) 28 | -------------------------------------------------------------------------------- /lean/.gitignore: -------------------------------------------------------------------------------- 1 | /tensorRT6.0.1.5 2 | /cuda10.0 3 | /cudnn7.6.3 4 | /opencv3.4.6 5 | /TensorRT-7.0.0.11 6 | /protobuf3.11.4 -------------------------------------------------------------------------------- /lean/README.md: -------------------------------------------------------------------------------- 1 | # Lean 2 | 3 | 编译好的protobuf Windows版本:http://zifuture.com:1000/fs/25.shared/lean.zip -------------------------------------------------------------------------------- /plugin_onnx_export.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | import json 5 | 6 | # 一个内置插件 7 | class HSwishImplementation(torch.autograd.Function): 8 | 9 | @staticmethod 10 | def symbolic(g, input, bias): 11 | return g.op("HSwish", input, bias, info_s="string attribute", kernel_size_i=3, eps_f=3e-2) 12 | 13 | @staticmethod 14 | def forward(ctx, i, bias): 15 | ctx.save_for_backward(i) 16 | return i * F.relu6(i + 3) / 6 + bias 17 | 18 | class MemoryEfficientHSwish(nn.Module): 19 | def __init__(self, shape): 20 | super(MemoryEfficientHSwish, self).__init__() 21 | self.bias = nn.Parameter(torch.zeros(shape)) 22 | self.bias.data.fill_(3.15) 23 | 24 | def forward(self, x): 25 | return HSwishImplementation.apply(x, self.bias) 26 | 27 | 28 | # 一个通过本框架实现的插件 29 | class MReLUImplementation(torch.autograd.Function): 30 | 31 | @staticmethod 32 | def symbolic(g, input, bias): 33 | return g.op("Plugin", input, bias, name_s="MReLU", info_s=json.dumps({ 34 | "kernel_size": 3, 35 | "eps": 3e-2, 36 | "other": "Hello Onnx Plugin" 37 | })) 38 | 39 | @staticmethod 40 | def forward(ctx, i, bias): 41 | ctx.save_for_backward(i) 42 | return F.relu(i) + bias 43 | 44 | class MReLU(nn.Module): 45 | def __init__(self, *shape): 46 | super(MReLU, self).__init__() 47 | self.bias = nn.Parameter(torch.zeros(shape)) 48 | self.bias.data.fill_(0.5) 49 | 50 | def forward(self, x): 51 | return MReLUImplementation.apply(x, self.bias) 52 | 53 | class FooModel(torch.nn.Module): 54 | def __init__(self): 55 | super(FooModel, self).__init__() 56 | self.hswish = MemoryEfficientHSwish(1) 57 | self.mrelu = MReLU(1) 58 | 59 | def forward(self, input1, input2): 60 | return self.mrelu(input2) + self.hswish(input1) 61 | 62 | dummy_input1 = torch.zeros((1, 3, 3, 3)) 63 | dummy_input2 = torch.zeros((1, 3, 3, 3)) 64 | model = FooModel() 65 | 66 | dummy_input1[...] = 0.25 67 | dummy_input2[...] = 0 68 | out = model(dummy_input1, dummy_input2) 69 | print(out) 70 | 71 | torch.onnx.export(model, (dummy_input1, dummy_input2), 'workspace/models/demo.onnx', verbose=True) -------------------------------------------------------------------------------- /scripts/getALL.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | bash scripts/getCenterTrack.sh 3 | bash scripts/getDBFace.sh 4 | bash scripts/getDLADCN.sh -------------------------------------------------------------------------------- /scripts/getCenterTrack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget http://zifuture.com:1000/fs/public_models/coco_tracking.onnx -O workspace/models/coco_tracking.onnx -------------------------------------------------------------------------------- /scripts/getDBFace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget http://zifuture.com:1000/fs/public_models/dbface.onnx -O workspace/models/dbface.onnx -------------------------------------------------------------------------------- /scripts/getDLADCN.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget http://zifuture.com:1000/fs/public_models/dladcnv2.onnx -O workspace/models/dladcnv2.onnx 3 | -------------------------------------------------------------------------------- /src/builder/trt_builder.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef TRT_BUILDER_HPP 4 | #define TRT_BUILDER_HPP 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace TRTBuilder { 12 | 13 | typedef std::function Int8Process; 14 | 15 | void setDevice(int device_id); 16 | 17 | enum ModelSourceType { 18 | ModelSourceType_FromCaffe, 19 | ModelSourceType_FromONNX 20 | }; 21 | 22 | class ModelSource { 23 | public: 24 | ModelSource(const std::string& prototxt, const std::string& caffemodel); 25 | ModelSource(const std::string& onnxmodel); 26 | ModelSourceType type() const; 27 | std::string prototxt() const; 28 | std::string caffemodel() const; 29 | std::string onnxmodel() const; 30 | 31 | private: 32 | std::string prototxt_, caffemodel_; 33 | std::string onnxmodel_; 34 | ModelSourceType type_; 35 | }; 36 | 37 | class InputDims { 38 | public: 39 | InputDims(int channels, int height, int width); 40 | 41 | int channels() const; 42 | int height() const; 43 | int width() const; 44 | 45 | private: 46 | int channels_, height_, width_; 47 | }; 48 | 49 | enum TRTMode { 50 | TRTMode_FP32, 51 | TRTMode_FP16 52 | }; 53 | 54 | const char* modeString(TRTMode type); 55 | 56 | bool compileTRT( 57 | TRTMode mode, 58 | const std::vector& outputs, 59 | unsigned int batchSize, 60 | const ModelSource& source, 61 | const std::string& savepath, 62 | const std::vector inputsDimsSetup = {}); 63 | }; 64 | 65 | #endif //TRT_BUILDER_HPP -------------------------------------------------------------------------------- /src/caffeplugin/caffeplugin.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef PLUGIN_BASE_HPP 3 | #define PLUGIN_BASE_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace Plugin { 17 | 18 | enum Phase { 19 | CompilePhase, 20 | InferencePhase 21 | }; 22 | 23 | struct GTensor { 24 | GTensor() {} 25 | GTensor(const TRTInfer::Tensor& tensor); 26 | GTensor(float* ptr, int n, int c, int h, int w); 27 | GTensor(TRTInfer::halfloat* ptr, int n, int c, int h, int w); 28 | int count(int start_axis = 0) const; 29 | inline int offset(int n = 0, int c = 0, int h = 0, int w = 0) const { return ((n * this->channel_ + c) * this->height_ + h) * this->width_ + w; } 30 | 31 | template 32 | inline _T* ptr() const { return (_T*)ptr_; } 33 | 34 | template 35 | inline _T* ptr(int n, int c = 0, int h = 0, int w = 0) const { return (_T*)ptr_ + offset(n, c, h, w); } 36 | 37 | inline float* ptr_float() const { return (float*)ptr_; } 38 | inline float* ptr_float(int n, int c = 0, int h = 0, int w = 0) const { return (float*)ptr_ + offset(n, c, h, w); } 39 | inline TRTInfer::halfloat* ptr_half() const { return (TRTInfer::halfloat*)ptr_; } 40 | inline TRTInfer::halfloat* ptr_half(int n, int c = 0, int h = 0, int w = 0) const { return (TRTInfer::halfloat*)ptr_ + offset(n, c, h, w); } 41 | 42 | int num_ = 0, channel_ = 0, height_ = 0, width_ = 0; 43 | void* ptr_ = nullptr; 44 | TRTInfer::DataType dtType_ = TRTInfer::DataType::dtFloat; 45 | }; 46 | 47 | struct LayerConfig { 48 | 49 | /////////////////////////////////// 50 | int nbOutput_ = 1; 51 | size_t workspaceSize_ = 0; 52 | std::set supportDataType_; 53 | std::set supportPluginFormat_; 54 | 55 | std::vector> weights_; 56 | TRTInfer::DataType configDataType_; 57 | nvinfer1::PluginFormat configPluginFormat_; 58 | int configMaxbatchSize_ = 0; 59 | 60 | /////////////////////////////////// 61 | std::vector input; 62 | std::vector output; 63 | std::string serializeData_; 64 | 65 | LayerConfig(); 66 | void serialCopyTo(void* buffer); 67 | int serialize(); 68 | void deserialize(const void* ptr, size_t length); 69 | void loadWeights(const nvinfer1::Weights* weights, int nbWeights); 70 | virtual void seril(ccutil::BinIO& out) {} 71 | virtual void deseril(ccutil::BinIO& in) {} 72 | }; 73 | 74 | class TRTPlugin : public nvinfer1::IPluginExt { 75 | public: 76 | 77 | virtual ~TRTPlugin(); 78 | virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) = 0; 79 | virtual int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) = 0; 80 | 81 | void pluginInit(const std::string& name, const nvinfer1::Weights* weights, int nbWeights); 82 | void pluginInit(const std::string& name, const void* serialData, size_t serialLength); 83 | 84 | virtual std::shared_ptr config(const std::string& layerName); 85 | virtual bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const; 86 | virtual void configureWithFormat( 87 | const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, 88 | int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize); 89 | virtual int getNbOutputs() const; 90 | virtual nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims); 91 | virtual int initialize(); 92 | virtual void terminate(); 93 | virtual size_t getWorkspaceSize(int maxBatchSize) const override; 94 | virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream); 95 | virtual size_t getSerializationSize(); 96 | virtual void serialize(void* buffer); 97 | 98 | private: 99 | void mappingToGTensor(); 100 | 101 | protected: 102 | std::string layerName_; 103 | Phase phase_ = CompilePhase; 104 | std::shared_ptr config_; 105 | std::vector inputTensors_; 106 | std::vector outputTensors_; 107 | std::vector weightTensors_; 108 | }; 109 | 110 | #define SETUP_PLUGIN(class_, pattern_) \ 111 | static std::string pattern() { \ 112 | return pattern_; \ 113 | } \ 114 | \ 115 | static std::shared_ptr creator() { \ 116 | return std::shared_ptr(new class_()); \ 117 | } 118 | 119 | #define RegisterPlugin(class_) \ 120 | static Plugin::PluginRegister __register##class_(class_::creator, class_::pattern()) 121 | 122 | typedef std::shared_ptr(*PluginCreater)(); 123 | 124 | struct PluginInfo { 125 | PluginCreater creater; 126 | std::string pattern; 127 | }; 128 | 129 | class PluginRegistry { 130 | public: 131 | virtual void addPlugin(PluginCreater creater, const std::string& pattern) = 0; 132 | virtual PluginInfo* findPlugin(const std::string& layerName) = 0; 133 | }; 134 | 135 | class PluginRegister { 136 | public: 137 | PluginRegister(PluginCreater creater, const std::string& pattern); 138 | }; 139 | 140 | class TRTBuilderPluginFactory : public nvcaffeparser1::IPluginFactoryExt, public nvinfer1::IPluginFactory { 141 | 142 | public: 143 | virtual bool isPluginExt(const char* layerName) override; 144 | virtual bool isPlugin(const char* layerName) override; 145 | virtual nvinfer1::IPluginExt* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override; 146 | virtual nvinfer1::IPluginExt* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override; 147 | 148 | virtual bool support(const std::string& layerName); 149 | 150 | virtual std::shared_ptr createPlugin(const std::string& layerName); 151 | virtual nvinfer1::IPluginExt* builderCreate(const std::string& layerName, const nvinfer1::Weights* weights, int nbWeights); 152 | virtual nvinfer1::IPluginExt* inferCreate(const std::string& layerName, const void* serialData, size_t serialLength); 153 | 154 | private: 155 | std::vector> plugins_; 156 | }; 157 | 158 | /////////////////////////////////////////////////////////////////////////////////////////// 159 | std::shared_ptr createPluginFactoryForBuildPhase(); 160 | std::shared_ptr createPluginFactoryForInferPhase(); 161 | PluginRegistry* getPluginRegistry(); 162 | 163 | #define ExecuteKernel(numJobs, kernel, stream) kernel<<>> 164 | }; //namespace Plugin 165 | 166 | #endif //PLUGIN_BASE_HPP -------------------------------------------------------------------------------- /src/caffeplugin/plugins/ChannelMultiplicationLayer.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "ChannelMultiplicationLayer.hpp" 4 | #include 5 | 6 | using namespace Plugin; 7 | 8 | template 9 | __global__ void channelMultiplicationKernel(const _T* in, const _T* muld, _T* out, int input_area, int edge) 10 | { 11 | KERNEL_POSITION; 12 | 13 | int c = position / input_area; 14 | out[position] = in[position] * muld[c]; 15 | } 16 | 17 | namespace Plugin { 18 | 19 | std::shared_ptr ChannelMultiplicationLayer::config(const std::string& layerName) { 20 | auto cfg = TRTPlugin::config(layerName); 21 | cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 22 | //cfg->supportDataType_ = {nvinfer1::DataType::kHALF}; 23 | return cfg; 24 | } 25 | 26 | int ChannelMultiplicationLayer::enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) { 27 | auto& data = inputs[0]; 28 | auto& mul = inputs[1]; 29 | auto& out = outputs[0]; 30 | int edge = data.count(); 31 | 32 | if (config_->configDataType_ == TRTInfer::DataType::dtFloat) { 33 | channelMultiplicationKernel <<>> (data.ptr_float(), mul.ptr_float(), out.ptr_float(), data.height_ * data.width_, edge); 34 | } 35 | else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) { 36 | channelMultiplicationKernel <<>> ( 37 | (const TRTInfer::halfloat*)data.ptr_half(), (const TRTInfer::halfloat*)mul.ptr_half(), out.ptr_half(), data.height_ * data.width_, edge 38 | ); 39 | } 40 | return 0; 41 | } 42 | 43 | nvinfer1::Dims ChannelMultiplicationLayer::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) { 44 | return inputDims[0]; 45 | } 46 | } 47 | 48 | RegisterPlugin(ChannelMultiplicationLayer); -------------------------------------------------------------------------------- /src/caffeplugin/plugins/ChannelMultiplicationLayer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/caffeplugin/plugins/ChannelMultiplicationLayer.hpp -------------------------------------------------------------------------------- /src/caffeplugin/plugins/ClipLayer.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "ClipLayer.hpp" 4 | #include 5 | 6 | using namespace Plugin; 7 | 8 | template 9 | __global__ void clipKernel(const _T* in, _T* out, int inw, int inh, int outw, int outh, int edge) 10 | { 11 | KERNEL_POSITION; 12 | 13 | int outHeightIndex = position; 14 | int selectOutInnerY = outHeightIndex % outh; 15 | int nc = outHeightIndex / outh; 16 | int inHeightIndex = nc * inh + selectOutInnerY; 17 | in += inHeightIndex * inw; 18 | out += outHeightIndex * outw; 19 | for (int i = 0; i < outw; ++i) 20 | *out++ = *in++; 21 | } 22 | 23 | namespace Plugin { 24 | 25 | std::shared_ptr ClipLayer::config(const std::string& layerName) { 26 | auto cfg = TRTPlugin::config(layerName); 27 | cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 28 | //cfg->supportDataType_ = {nvinfer1::DataType::kHALF}; 29 | return cfg; 30 | } 31 | 32 | int ClipLayer::enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) { 33 | auto& data = inputs[0]; 34 | auto& out = outputs[0]; 35 | 36 | int edge = out.num_ * out.channel_ * out.height_; 37 | if (config_->configDataType_ == TRTInfer::DataType::dtFloat) { 38 | clipKernel <<>> (data.ptr_float(), out.ptr_float(), data.width_, data.height_, out.width_, out.height_, edge); 39 | } 40 | else if(config_->configDataType_ == TRTInfer::DataType::dtHalfloat) { 41 | clipKernel <<>> (data.ptr_half(), out.ptr_half(), data.width_, data.height_, out.width_, out.height_, edge); 42 | } 43 | return 0; 44 | } 45 | 46 | nvinfer1::Dims ClipLayer::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) { 47 | return nvinfer1::Dims3(inputDims[0].d[0], inputDims[0].d[1] - 1, inputDims[0].d[2] - 1); 48 | } 49 | } 50 | 51 | RegisterPlugin(ClipLayer); -------------------------------------------------------------------------------- /src/caffeplugin/plugins/ClipLayer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/caffeplugin/plugins/ClipLayer.hpp -------------------------------------------------------------------------------- /src/caffeplugin/plugins/DCNLayer.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DCNLayer_HPP 3 | #define DCNLayer_HPP 4 | 5 | #include 6 | #include 7 | 8 | namespace Plugin { 9 | class DCNLayer : public TRTPlugin { 10 | public: 11 | SETUP_PLUGIN(DCNLayer, "DCN*"); 12 | 13 | DCNLayer(); 14 | virtual ~DCNLayer(); 15 | 16 | virtual std::shared_ptr config(const std::string& layerName) override; 17 | 18 | virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims); 19 | virtual size_t getWorkspaceSize(int maxBatchSize) const override; 20 | virtual int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override; 21 | private: 22 | cublasHandle_t cublasHandle_ = nullptr; 23 | }; 24 | } 25 | 26 | #endif //DCNLayer_HPP -------------------------------------------------------------------------------- /src/caffeplugin/plugins/PlexShuffleLayer.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "PlexShuffleLayer.hpp" 4 | #include 5 | 6 | using namespace Plugin; 7 | 8 | template 9 | __global__ void pixelShuffleKernel( 10 | const _T* bottom_data, _T* top_data, int input_c, int input_h, 11 | int input_w, int input_area, int output_h, int output_w, int output_area, int edge) 12 | { 13 | KERNEL_POSITION; 14 | 15 | int input_c_index = position / input_area; 16 | int f_input_index = position % input_area; 17 | int input_row = f_input_index / input_w; 18 | int input_col = f_input_index % input_w; 19 | int output_c_index = input_c_index / 4; 20 | input_c_index = input_c_index % 4; 21 | int output_row = input_row * 2 + input_c_index / 2; 22 | int output_col = input_col * 2 + input_c_index % 2; 23 | int output_index = output_c_index * output_area + output_row * output_w + output_col; 24 | top_data[output_index] = bottom_data[position]; 25 | } 26 | 27 | namespace Plugin { 28 | 29 | std::shared_ptr PlexShuffleLayer::config(const std::string& layerName){ 30 | auto cfg = TRTPlugin::config(layerName); 31 | cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 32 | //cfg->supportDataType_ = {nvinfer1::DataType::kHALF}; 33 | return cfg; 34 | } 35 | 36 | int PlexShuffleLayer::enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) { 37 | auto& data = inputs[0]; 38 | auto& out = outputs[0]; 39 | 40 | int edge = data.count(); 41 | if (config_->configDataType_ == TRTInfer::DataType::dtFloat) { 42 | pixelShuffleKernel <<>> (data.ptr_float(), out.ptr_float(), data.channel_, data.height_, data.width_, 43 | data.height_ * data.width_, out.height_, out.width_, out.height_ * out.width_, edge); 44 | } 45 | else { 46 | pixelShuffleKernel <<>> (data.ptr_half(), out.ptr_half(), data.channel_, data.height_, data.width_, 47 | data.height_ * data.width_, out.height_, out.width_, out.height_ * out.width_, edge); 48 | } 49 | return 0; 50 | } 51 | 52 | nvinfer1::Dims PlexShuffleLayer::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) { 53 | return nvinfer1::Dims3(inputDims[0].d[0] / 4, inputDims[0].d[1] * 2, inputDims[0].d[2] * 2); 54 | } 55 | } 56 | 57 | RegisterPlugin(PlexShuffleLayer); -------------------------------------------------------------------------------- /src/caffeplugin/plugins/PlexShuffleLayer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/caffeplugin/plugins/PlexShuffleLayer.hpp -------------------------------------------------------------------------------- /src/caffeplugin/plugins/TestPlugin.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "TestPlugin.hpp" 4 | 5 | typedef TRTInfer::halfloat halfloat; 6 | 7 | template 8 | __global__ void MyPluginKenel(_T* input, _T* output, int edge); 9 | 10 | template<> 11 | __global__ void MyPluginKenel(float* input, float* output, int edge) { 12 | 13 | KERNEL_POSITION; 14 | output[position] = (input[position] < 0 ? 0 : input[position]) + 1.3f; 15 | } 16 | 17 | template<> 18 | __global__ void MyPluginKenel(halfloat* input, halfloat* output, int edge) { 19 | 20 | KERNEL_POSITION; 21 | 22 | halfloat zero = 0.0f; 23 | halfloat add = 1.3f; 24 | output[position] = (input[position] < zero ? zero : input[position]) + add; 25 | } 26 | 27 | nvinfer1::Dims TestPlugin::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) { 28 | return inputDims[0]; 29 | } 30 | 31 | std::shared_ptr TestPlugin::config(const std::string& layerName) { 32 | auto cfg = TRTPlugin::config(layerName); 33 | 34 | //定义我们这个插件支持half和float格式 35 | cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 36 | //cfg->supportDataType_ = {nvinfer1::DataType::kHALF}; 37 | return cfg; 38 | } 39 | 40 | int TestPlugin::enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) { 41 | 42 | int count = inputs[0].count(); 43 | auto grid = gridDims(count); 44 | auto block = blockDims(count); 45 | 46 | if (config_->configDataType_ == TRTInfer::DataType::dtFloat) { 47 | MyPluginKenel <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 48 | } 49 | else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) { 50 | MyPluginKenel <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 51 | } 52 | return 0; 53 | } 54 | 55 | RegisterPlugin(TestPlugin); -------------------------------------------------------------------------------- /src/caffeplugin/plugins/TestPlugin.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef WReLU_HPP 3 | #define WReLU_HPP 4 | 5 | #include 6 | 7 | using namespace Plugin; 8 | 9 | class TestPlugin : public TRTPlugin { 10 | public: 11 | //设置插件函数,通过宏,执行插件创建函数,同时执行模式匹配名字,用来对每个层的名字做模式匹配 12 | //该匹配方法用的是ccutil::patternMatch,请参照这个函数 13 | SETUP_PLUGIN(TestPlugin, "TestPlugin*"); 14 | 15 | virtual std::shared_ptr config(const std::string& layerName) override; 16 | 17 | //这个插件只有一个输出,输出的shape等于输入0的shape,因此返回input0的shape 18 | virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override; 19 | 20 | //执行过程 21 | int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override; 22 | }; 23 | 24 | #endif //WReLU_HPP -------------------------------------------------------------------------------- /src/common/trt_common.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "trt_common.hpp" 4 | #include 5 | 6 | dim3 gridDims(int numJobs) { 7 | int numBlockThreads = numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS; 8 | return dim3(ceil(numJobs / (float)numBlockThreads)); 9 | } 10 | 11 | dim3 blockDims(int numJobs) { 12 | return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS; 13 | } -------------------------------------------------------------------------------- /src/common/trt_common.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TRT_COMMON_HPP 3 | #define TRT_COMMON_HPP 4 | 5 | #include 6 | 7 | #define GPU_BLOCK_THREADS 512 8 | #define KERNEL_POSITION \ 9 | int position = (blockDim.x * blockIdx.x + threadIdx.x); \ 10 | if (position >= (edge)) return; 11 | 12 | dim3 gridDims(int numJobs); 13 | dim3 blockDims(int numJobs); 14 | 15 | #endif //TRT_COMMON_HPP -------------------------------------------------------------------------------- /src/examples/center_net_coco2x_dcn.cpp: -------------------------------------------------------------------------------- 1 |  2 | #include 3 | #include 4 | #include "builder/trt_builder.hpp" 5 | #include "infer/trt_infer.hpp" 6 | #include "infer/ct_detect_backend.hpp" 7 | 8 | using namespace cv; 9 | using namespace std; 10 | 11 | namespace examples { 12 | 13 | static Rect restoreCenterNetBox(float dx, float dy, float dw, float dh, float cellx, float celly, int stride, Size netSize, Size imageSize) { 14 | 15 | float scale = 0; 16 | if (imageSize.width >= imageSize.height) 17 | scale = netSize.width / (float)imageSize.width; 18 | else 19 | scale = netSize.height / (float)imageSize.height; 20 | 21 | float x = ((cellx + dx - dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5; 22 | float y = ((celly + dy - dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5; 23 | float r = ((cellx + dx + dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5; 24 | float b = ((celly + dy + dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5; 25 | return Rect(Point(x, y), Point(r + 1, b + 1)); 26 | } 27 | 28 | static void preprocessCenterNetImageToTensor(const Mat& image, int numIndex, const shared_ptr& tensor) { 29 | 30 | int outH = tensor->height(); 31 | int outW = tensor->width(); 32 | float sw = outW / (float)image.cols; 33 | float sh = outH / (float)image.rows; 34 | float scale = std::min(sw, sh); 35 | 36 | Mat matrix = getRotationMatrix2D(Point2f(image.cols*0.5, image.rows*0.5), 0, scale); 37 | matrix.at(0, 2) -= image.cols*0.5 - outW * 0.5; 38 | matrix.at(1, 2) -= image.rows*0.5 - outH * 0.5; 39 | 40 | float mean[3] = {0.40789654, 0.44719302, 0.47026115}; 41 | float std[3] = {0.28863828, 0.27408164, 0.27809835}; 42 | 43 | Mat outimage; 44 | cv::warpAffine(image, outimage, matrix, Size(outW, outH)); 45 | tensor->setNormMatGPU(numIndex, outimage, mean, std); 46 | } 47 | 48 | static vector detectBoundingbox(const shared_ptr& boundingboxDetect_, const Mat& image, float threshold = 0.3) { 49 | 50 | if (boundingboxDetect_ == nullptr) { 51 | INFO("detectBoundingbox failure call, model is nullptr"); 52 | return vector(); 53 | } 54 | 55 | preprocessCenterNetImageToTensor(image, 0, boundingboxDetect_->input()); 56 | boundingboxDetect_->forward(); 57 | 58 | auto outHM = boundingboxDetect_->tensor("hm"); 59 | auto outHMPool = boundingboxDetect_->tensor("hm_pool"); 60 | auto outWH = boundingboxDetect_->tensor("wh"); 61 | auto outXY = boundingboxDetect_->tensor("reg"); 62 | const int stride = 4; 63 | 64 | vector bboxs; 65 | Size inputSize = boundingboxDetect_->input()->size(); 66 | float sx = image.cols / (float)inputSize.width * stride; 67 | float sy = image.rows / (float)inputSize.height * stride; 68 | 69 | for (int class_ = 0; class_ < outHM->channel(); ++class_) { 70 | for (int i = 0; i < outHM->height(); ++i) { 71 | float* ohmptr = outHM->cpu(0, class_, i); 72 | float* ohmpoolptr = outHMPool->cpu(0, class_, i); 73 | for (int j = 0; j < outHM->width(); ++j) { 74 | if (*ohmptr == *ohmpoolptr && *ohmpoolptr > threshold) { 75 | 76 | float dx = outXY->at(0, 0, i, j); 77 | float dy = outXY->at(0, 1, i, j); 78 | float dw = outWH->at(0, 0, i, j); 79 | float dh = outWH->at(0, 1, i, j); 80 | ccutil::BBox box = restoreCenterNetBox(dx, dy, dw, dh, j, i, stride, inputSize, image.size()); 81 | box = box.box() & Rect(0, 0, image.cols, image.rows); 82 | box.label = class_; 83 | box.score = *ohmptr; 84 | 85 | if (box.area() > 0) 86 | bboxs.push_back(box); 87 | } 88 | ++ohmptr; 89 | ++ohmpoolptr; 90 | } 91 | } 92 | } 93 | return bboxs; 94 | } 95 | 96 | static vector> detectBoundingboxOptim(const shared_ptr& boundingboxDetect_, const vector& images, 97 | float threshold, int maxobjs, TRTInfer::CTDetectBackend* detectBackend) { 98 | 99 | if (boundingboxDetect_ == nullptr) { 100 | INFO("detectBoundingbox failure call, model is nullptr"); 101 | return vector>(); 102 | } 103 | boundingboxDetect_->input()->resize(images.size()); 104 | 105 | vector imsize; 106 | for (int i = 0; i < images.size(); ++i) { 107 | preprocessCenterNetImageToTensor(images[i], i, boundingboxDetect_->input()); //1.0 ms 108 | imsize.emplace_back(images[i].size()); 109 | } 110 | 111 | boundingboxDetect_->forward(false); //41.5 ms 112 | auto outHM = boundingboxDetect_->tensor("hm"); 113 | auto outHMPool = boundingboxDetect_->tensor("hm_pool"); 114 | auto outWH = boundingboxDetect_->tensor("wh"); 115 | auto outXY = boundingboxDetect_->tensor("reg"); 116 | return detectBackend->forwardGPU(outHM, outHMPool, outWH, outXY, imsize, threshold, maxobjs); // 0.25 ms 117 | } 118 | 119 | void center_net_coco2x_dcn() { 120 | 121 | INFOW("onnx to trtmodel..."); 122 | 123 | // tensorRT 7.0 + OnnX: Must be an explicit batchsize, that is, batchsize must be specified at compile time. 124 | int batchSize = 2; 125 | auto modelFile = ccutil::format("models/dladcnv2.fp32.b%d.trtmodel", batchSize); 126 | if (!ccutil::exists(modelFile)) { 127 | 128 | if (!ccutil::exists("models/dladcnv2.onnx")) { 129 | INFOW( 130 | "models/dladcnv2.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dladcnv2.onnx " 131 | "or use centerNetDLADCNOnnX/dladcn_export_onnx.py to generate" 132 | ); 133 | return; 134 | } 135 | 136 | TRTBuilder::compileTRT( 137 | TRTBuilder::TRTMode_FP32, {}, batchSize, 138 | TRTBuilder::ModelSource("models/dladcnv2.onnx"), 139 | modelFile, {TRTBuilder::InputDims(3, 512, 512)} 140 | ); 141 | } 142 | 143 | INFO("load model: %s", modelFile.c_str()); 144 | auto engine = TRTInfer::loadEngine(modelFile); 145 | if (!engine) { 146 | INFO("can not load model."); 147 | return; 148 | } 149 | 150 | INFO("forward..."); 151 | vector images{ 152 | imread("imgs/www.jpg"), 153 | imread("imgs/17790319373_bd19b24cfc_k.jpg") 154 | }; 155 | 156 | TRTInfer::CTDetectBackend backend(engine->getCUStream()); 157 | auto imobjs = detectBoundingboxOptim(engine, images, 0.3, 100, &backend); // 43.86 ms 158 | 159 | for (int j = 0; j < images.size(); ++j) { 160 | auto& objs = imobjs[j]; 161 | objs = ccutil::nms(objs, 0.5); 162 | 163 | INFO("objs.length = %d", objs.size()); 164 | for (int i = 0; i < objs.size(); ++i) { 165 | auto& obj = objs[i]; 166 | ccutil::drawbbox(images[j], obj); 167 | } 168 | imwrite(ccutil::format("results/%d.centernet.coco2x.dcn.jpg", j), images[j]); 169 | } 170 | 171 | #ifdef _WIN32 172 | cv::imshow("dla dcn detect 1", images[0]); 173 | cv::imshow("dla dcn detect 2", images[1]); 174 | cv::waitKey(); 175 | cv::destroyAllWindows(); 176 | #endif 177 | INFO("done."); 178 | } 179 | }; -------------------------------------------------------------------------------- /src/examples/center_track_coco_tracking.cpp: -------------------------------------------------------------------------------- 1 |  2 | #include 3 | #include 4 | #include "builder/trt_builder.hpp" 5 | #include "infer/trt_infer.hpp" 6 | 7 | using namespace cv; 8 | using namespace std; 9 | 10 | namespace examples { 11 | 12 | static void drawArrow(cv::Mat& img, cv::Point pStart, cv::Point pEnd, int len, int alpha, 13 | cv::Scalar color, int thickness, int lineType) 14 | { 15 | const double PI = 3.1415926; 16 | Point arrow; 17 | double angle = atan2((double)(pStart.y - pEnd.y), (double)(pStart.x - pEnd.x)); 18 | line(img, pStart, pEnd, color, thickness, lineType); 19 | 20 | arrow.x = pEnd.x + len * cos(angle + PI * alpha / 180); 21 | arrow.y = pEnd.y + len * sin(angle + PI * alpha / 180); 22 | line(img, pEnd, arrow, color, thickness, lineType); 23 | arrow.x = pEnd.x + len * cos(angle - PI * alpha / 180); 24 | arrow.y = pEnd.y + len * sin(angle - PI * alpha / 180); 25 | line(img, pEnd, arrow, color, thickness, lineType); 26 | } 27 | 28 | static Rect restoreCenterNetBox(float dx, float dy, float dw, float dh, float cellx, float celly, int stride, Size netSize, Size imageSize) { 29 | 30 | float scale = 0; 31 | if (imageSize.width >= imageSize.height) 32 | scale = netSize.width / (float)imageSize.width; 33 | else 34 | scale = netSize.height / (float)imageSize.height; 35 | 36 | float x = ((cellx + dx - dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5; 37 | float y = ((celly + dy - dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5; 38 | float r = ((cellx + dx + dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5; 39 | float b = ((celly + dy + dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5; 40 | return Rect(Point(x, y), Point(r + 1, b + 1)); 41 | } 42 | 43 | static Scalar restoreCenterTracking(float ox, float oy, float cellx, float celly, int stride, Size netSize, Size imageSize) { 44 | 45 | float scale = 0; 46 | if (imageSize.width >= imageSize.height) 47 | scale = netSize.width / (float)imageSize.width; 48 | else 49 | scale = netSize.height / (float)imageSize.height; 50 | 51 | float x = ((cellx + ox) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5; 52 | float y = ((celly + oy) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5; 53 | float x0 = ((cellx)* stride - netSize.width * 0.5) / scale + imageSize.width * 0.5; 54 | float y0 = ((celly)* stride - netSize.height * 0.5) / scale + imageSize.height * 0.5; 55 | return Scalar(x0, y0, x, y); 56 | } 57 | 58 | static void preprocessCenterNetImageToTensor(const Mat& image, int numIndex, const shared_ptr& tensor) { 59 | 60 | int outH = tensor->height(); 61 | int outW = tensor->width(); 62 | float sw = outW / (float)image.cols; 63 | float sh = outH / (float)image.rows; 64 | float scale = std::min(sw, sh); 65 | 66 | Mat matrix = getRotationMatrix2D(Point2f(image.cols*0.5, image.rows*0.5), 0, scale); 67 | matrix.at(0, 2) -= image.cols*0.5 - outW * 0.5; 68 | matrix.at(1, 2) -= image.rows*0.5 - outH * 0.5; 69 | 70 | float mean[3] = {0.40789654, 0.44719302, 0.47026115}; 71 | float std[3] = {0.28863828, 0.27408164, 0.27809835}; 72 | 73 | Mat outimage; 74 | cv::warpAffine(image, outimage, matrix, Size(outW, outH)); 75 | tensor->setNormMatGPU(numIndex, outimage, mean, std); 76 | } 77 | 78 | static vector> detectBoundingboxAndTracking(const shared_ptr& boundingboxAndTrackingDetect_, const Mat& image, const Mat& prevImage, float threshold = 0.3) { 79 | 80 | if (boundingboxAndTrackingDetect_ == nullptr) { 81 | INFO("detectBoundingbox failure call, model is nullptr"); 82 | return vector>(); 83 | } 84 | 85 | preprocessCenterNetImageToTensor(image, 0, boundingboxAndTrackingDetect_->input(0)); 86 | preprocessCenterNetImageToTensor(prevImage, 0, boundingboxAndTrackingDetect_->input(1)); 87 | boundingboxAndTrackingDetect_->forward(); 88 | auto outHM = boundingboxAndTrackingDetect_->tensor("hm"); 89 | auto outHMPool = boundingboxAndTrackingDetect_->tensor("hm_pool"); 90 | auto outWH = boundingboxAndTrackingDetect_->tensor("wh"); 91 | auto outXY = boundingboxAndTrackingDetect_->tensor("reg"); 92 | auto outTracking = boundingboxAndTrackingDetect_->tensor("tracking"); 93 | const int stride = 4; 94 | 95 | vector> bboxs; 96 | Size inputSize = boundingboxAndTrackingDetect_->input()->size(); 97 | float sx = image.cols / (float)inputSize.width * stride; 98 | float sy = image.rows / (float)inputSize.height * stride; 99 | 100 | for (int class_ = 0; class_ < outHM->channel(); ++class_) { 101 | for (int i = 0; i < outHM->height(); ++i) { 102 | float* ohmptr = outHM->cpu(0, class_, i); 103 | float* ohmpoolptr = outHMPool->cpu(0, class_, i); 104 | for (int j = 0; j < outHM->width(); ++j) { 105 | if (*ohmptr == *ohmpoolptr && *ohmpoolptr > threshold) { 106 | 107 | float dx = outXY->at(0, 0, i, j); 108 | float dy = outXY->at(0, 1, i, j); 109 | float dw = outWH->at(0, 0, i, j); 110 | float dh = outWH->at(0, 1, i, j); 111 | float ox = outTracking->at(0, 0, i, j); 112 | float oy = outTracking->at(0, 1, i, j); 113 | ccutil::BBox box = restoreCenterNetBox(dx, dy, dw, dh, j, i, stride, inputSize, image.size()); 114 | auto offset = restoreCenterTracking(ox, oy, j, i, stride, inputSize, image.size()); 115 | box = box.box() & Rect(0, 0, image.cols, image.rows); 116 | box.label = class_; 117 | box.score = *ohmptr; 118 | 119 | if (box.area() > 0) 120 | bboxs.push_back(make_tuple(box, offset)); 121 | } 122 | ++ohmptr; 123 | ++ohmpoolptr; 124 | } 125 | } 126 | } 127 | return bboxs; 128 | } 129 | 130 | void center_track_coco_tracking() { 131 | INFOW("onnx to trtmodel..."); 132 | 133 | if (!ccutil::exists("models/coco_tracking.fp32.trtmodel")) { 134 | 135 | if (!ccutil::exists("models/coco_tracking.onnx")) { 136 | 137 | INFOW( 138 | "models/coco_tracking.onnx not found, download url: http://zifuture.com:1000/fs/public_models/coco_tracking.onnx" 139 | ); 140 | return; 141 | } 142 | 143 | TRTBuilder::compileTRT( 144 | TRTBuilder::TRTMode_FP32, {}, 1, 145 | TRTBuilder::ModelSource("models/coco_tracking.onnx"), 146 | "models/coco_tracking.fp32.trtmodel", 147 | {TRTBuilder::InputDims(3, 512, 512), TRTBuilder::InputDims(3, 512, 512)} 148 | ); 149 | } 150 | 151 | INFO("load model: models/coco_tracking.fp32.trtmodel"); 152 | auto engine = TRTInfer::loadEngine("models/coco_tracking.fp32.trtmodel"); 153 | if (!engine) { 154 | INFO("can not load model."); 155 | return; 156 | } 157 | 158 | INFO("forward..."); 159 | Mat prevImage = imread("imgs/000020.jpg"); 160 | Mat image = imread("imgs/000023.jpg"); 161 | 162 | auto objs = detectBoundingboxAndTracking(engine, image, prevImage, 0.35); 163 | 164 | INFO("objs.length = %d", objs.size()); 165 | for (int i = 0; i < objs.size(); ++i) { 166 | auto& obj = objs[i]; 167 | auto& box = get<0>(obj); 168 | auto& offset = get<1>(obj); 169 | ccutil::drawbbox(image, box, ccutil::DrawType::NoName); 170 | drawArrow(image, Point(offset[0], offset[1]), Point(offset[2], offset[3]), 10, 35, Scalar(0, 255, 0), 2, 16); 171 | 172 | ccutil::drawbbox(prevImage, box, ccutil::DrawType::NoName); 173 | drawArrow(prevImage, Point(offset[0], offset[1]), Point(offset[2], offset[3]), 10, 35, Scalar(0, 255, 0), 2, 16); 174 | } 175 | 176 | imwrite("results/coco.tracking.jpg", image); 177 | 178 | #ifdef _WIN32 179 | cv::imshow("coco.tracking.current", image); 180 | cv::imshow("coco.tracking.prev", prevImage); 181 | cv::waitKey(); 182 | cv::destroyAllWindows(); 183 | #endif 184 | INFO("done."); 185 | } 186 | }; -------------------------------------------------------------------------------- /src/examples/dbface.cpp: -------------------------------------------------------------------------------- 1 |  2 | #include 3 | #include 4 | #include "builder/trt_builder.hpp" 5 | #include "infer/trt_infer.hpp" 6 | #include "infer/dbface_backend.hpp" 7 | 8 | using namespace cv; 9 | using namespace std; 10 | 11 | namespace examples { 12 | 13 | static float commonExp(float value) { 14 | 15 | float gate = 1; 16 | float base = exp(gate); 17 | if (fabs(value) < gate) 18 | return value * base; 19 | 20 | if (value > 0) { 21 | return exp(value); 22 | } 23 | else { 24 | return -exp(-value); 25 | } 26 | } 27 | 28 | static vector detectDBFace(const shared_ptr& dbfaceDetect_, const Mat& image, float threshold = 0.3) { 29 | 30 | Assert(image.cols % 32 == 0 || image.rows % 32 == 0); 31 | 32 | float mean[3] = {0.408, 0.447, 0.47}; 33 | float std[3] = {0.289, 0.274, 0.278}; 34 | 35 | //dbfaceDetect_->input()->setNormMat(0, image, mean, std); // 20 ms 36 | dbfaceDetect_->input()->setNormMatGPU(0, image, mean, std); // 5 ms 37 | dbfaceDetect_->forward(); 38 | auto outHM = dbfaceDetect_->tensor("hm"); 39 | auto outHMPool = dbfaceDetect_->tensor("pool_hm"); 40 | auto outTLRB = dbfaceDetect_->tensor("tlrb"); 41 | auto outLandmark = dbfaceDetect_->tensor("landmark"); 42 | const int stride = 4; 43 | 44 | vector bboxs; 45 | Size inputSize = dbfaceDetect_->input()->size(); 46 | float sx = image.cols / (float)inputSize.width * stride; 47 | float sy = image.rows / (float)inputSize.height * stride; 48 | 49 | for (int class_ = 0; class_ < outHM->channel(); ++class_) { 50 | for (int i = 0; i < outHM->height(); ++i) { 51 | float* ohmptr = outHM->cpu(0, class_, i); 52 | float* ohmpoolptr = outHMPool->cpu(0, class_, i); 53 | for (int j = 0; j < outHM->width(); ++j) { 54 | if (*ohmptr == *ohmpoolptr && *ohmpoolptr > threshold) { 55 | 56 | float dx = outTLRB->at(0, 0, i, j); 57 | float dy = outTLRB->at(0, 1, i, j); 58 | float dr = outTLRB->at(0, 2, i, j); 59 | float db = outTLRB->at(0, 3, i, j); 60 | float cx = j; 61 | float cy = i; 62 | float x = (cx - dx) * stride; 63 | float y = (cy - dy) * stride; 64 | float r = (cx + dr) * stride; 65 | float b = (cy + db) * stride; 66 | 67 | TRTInfer::FaceBox box(ccutil::BBox(x, y, r, b, *ohmptr, class_)); 68 | if (box.area() > 0) { 69 | 70 | for (int k = 0; k < 5; ++k) { 71 | float landmark_x = outLandmark->at(0, k, i, j) * 4; 72 | float landmark_y = outLandmark->at(0, k + 5, i, j) * 4; 73 | landmark_x = (commonExp(landmark_x) + cx) * stride; 74 | landmark_y = (commonExp(landmark_y) + cy) * stride; 75 | box.landmark[k] = Point2f(landmark_x, landmark_y); 76 | } 77 | bboxs.push_back(box); 78 | } 79 | } 80 | ++ohmptr; 81 | ++ohmpoolptr; 82 | } 83 | } 84 | } 85 | return bboxs; 86 | } 87 | 88 | static vector detectDBFaceOptim(const shared_ptr& dbfaceDetect_, const Mat& image, float threshold, TRTInfer::DBFaceBackend* backend) { 89 | 90 | Assert(image.cols % 32 == 0 || image.rows % 32 == 0); 91 | 92 | float mean[3] = {0.408, 0.447, 0.47}; 93 | float std[3] = {0.289, 0.274, 0.278}; 94 | 95 | dbfaceDetect_->input()->setNormMatGPU(0, image, mean, std); // 5 ms 96 | dbfaceDetect_->forward(false); 97 | auto outHM = dbfaceDetect_->tensor("hm"); 98 | auto outHMPool = dbfaceDetect_->tensor("pool_hm"); 99 | auto outTLRB = dbfaceDetect_->tensor("tlrb"); 100 | auto outLandmark = dbfaceDetect_->tensor("landmark"); 101 | const int stride = 4; 102 | return backend->forwardGPU(outHM, outHMPool, outTLRB, outLandmark, threshold, 1000)[0]; // 0.25 ms 103 | } 104 | 105 | static Mat padImage(const Mat& image, int stride = 32) { 106 | 107 | int w = image.cols; 108 | if (image.cols % stride != 0) 109 | w = image.cols + (stride - (image.cols % stride)); 110 | 111 | int h = image.rows; 112 | if (image.rows % stride != 0) 113 | h = image.rows + (stride - (image.rows % stride)); 114 | 115 | if (Size(w, h) == image.size()) 116 | return image; 117 | 118 | Mat output(h, w, image.type(), Scalar(0)); 119 | image.copyTo(output(Rect(0, 0, image.cols, image.rows))); 120 | return output; 121 | } 122 | 123 | void dbface() { 124 | 125 | Mat image = imread("imgs/selfie.jpg"); 126 | if (image.empty()) { 127 | INFOW("image load fail"); 128 | return; 129 | } 130 | 131 | Mat padimage = padImage(image); 132 | int maxBatchSize = 1; 133 | string modelPath = ccutil::format("models/dbface.%dx%d.fp32.b%d.trtmodel", padimage.cols, padimage.rows, maxBatchSize); 134 | 135 | if (!ccutil::exists(modelPath)) { 136 | 137 | if (!ccutil::exists("models/dbface.onnx")) { 138 | INFOW( 139 | "models/dbface.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dbface.onnx" 140 | ); 141 | return; 142 | } 143 | 144 | TRTBuilder::compileTRT( 145 | TRTBuilder::TRTMode_FP32, {}, maxBatchSize, 146 | TRTBuilder::ModelSource("models/dbface.onnx"), 147 | modelPath, 148 | {TRTBuilder::InputDims(3, padimage.rows, padimage.cols)} 149 | ); 150 | } 151 | 152 | INFO("load model: %s", modelPath.c_str()); 153 | auto engine = TRTInfer::loadEngine(modelPath); 154 | if (!engine) { 155 | INFO("can not load model: %s", modelPath.c_str()); 156 | return; 157 | } 158 | 159 | INFO("forward..."); 160 | TRTInfer::DBFaceBackend backend(engine->getCUStream()); 161 | auto objs = detectDBFaceOptim(engine, image, 0.25, &backend); 162 | 163 | INFO("objs.length = %d", objs.size()); 164 | for (int i = 0; i < objs.size(); ++i) { 165 | auto& obj = objs[i]; 166 | ccutil::drawbbox(image, obj, ccutil::DrawType::Empty); 167 | 168 | for (int k = 0; k < 5; ++k) { 169 | cv::circle(image, obj.landmark[k], 3, Scalar(0, 0, 255), -1, 16); 170 | } 171 | } 172 | 173 | imwrite("results/selfie.draw.jpg", image); 174 | 175 | #ifdef _WIN32 176 | cv::imshow("dbface selfie detect", image); 177 | cv::waitKey(); 178 | cv::destroyAllWindows(); 179 | #endif 180 | INFO("done."); 181 | } 182 | }; -------------------------------------------------------------------------------- /src/examples/onnx.cpp: -------------------------------------------------------------------------------- 1 |  2 | #include 3 | #include 4 | #include "builder/trt_builder.hpp" 5 | #include "infer/trt_infer.hpp" 6 | 7 | using namespace cv; 8 | using namespace std; 9 | 10 | namespace examples { 11 | 12 | void onnx() { 13 | 14 | if (!ccutil::exists("models/demo.onnx")) { 15 | INFOE("models/demo.onnx not exists, run< python plugin_onnx_export.py > generate demo.onnx."); 16 | return; 17 | } 18 | 19 | INFOW("onnx to trtmodel..."); 20 | TRTBuilder::compileTRT( 21 | TRTBuilder::TRTMode_FP32, {}, 4, 22 | TRTBuilder::ModelSource("models/demo.onnx"), 23 | "models/demo.fp32.trtmodel", 24 | {TRTBuilder::InputDims(3, 5, 5), TRTBuilder::InputDims(3, 5, 5)} 25 | ); 26 | INFO("done."); 27 | 28 | INFO("load model: models/demo.fp32.trtmodel"); 29 | auto engine = TRTInfer::loadEngine("models/demo.fp32.trtmodel"); 30 | if (!engine) { 31 | INFO("can not load model."); 32 | return; 33 | } 34 | 35 | INFO("forward..."); 36 | 37 | engine->input(0)->setTo(0.25); 38 | engine->input(1)->setTo(0); 39 | engine->forward(); 40 | auto output = engine->output(0); 41 | output->print(); 42 | INFO("done."); 43 | } 44 | }; -------------------------------------------------------------------------------- /src/import_lib.cpp: -------------------------------------------------------------------------------- 1 |  2 | 3 | //导入OpenCV,根据编译情况选择不同库 4 | #if defined(_DEBUG) 5 | # pragma comment(lib, "opencv_world346d.lib") 6 | #else 7 | # pragma comment(lib, "opencv_world346.lib") 8 | #endif 9 | 10 | //导入cuda 11 | #pragma comment(lib, "cuda.lib") 12 | #pragma comment(lib, "cudart.lib") 13 | #pragma comment(lib, "cublas.lib") 14 | #pragma comment(lib, "cudnn.lib") 15 | 16 | //导入tensorRT 17 | #pragma comment(lib, "nvinfer.lib") 18 | #pragma comment(lib, "nvinfer_plugin.lib") 19 | #pragma comment(lib, "nvparsers.lib") 20 | 21 | #if defined(_DEBUG) 22 | #pragma comment(lib, "libprotobufd.lib") 23 | #else 24 | #pragma comment(lib, "libprotobuf.lib") 25 | #endif 26 | //#pragma comment(lib, "nvonnxparser.lib") -------------------------------------------------------------------------------- /src/infer/ct_detect_backend.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "ct_detect_backend.hpp" 4 | #include 5 | #include 6 | 7 | namespace TRTInfer { 8 | 9 | CTDetectBackend::CTDetectBackend(CUStream stream) :Backend(stream){} 10 | 11 | static __global__ void CTDetectBackend_forwardGPU(float* hm, float* hmpool, float* wh, float* reg, int* countptr, ccutil::BBox* boxptr, int width, int height, int w_x_h, int channels, int stride, float threshold, 12 | int maxobjs, int imageWidth, int imageHeight, float scale, int edge) { 13 | 14 | KERNEL_POSITION; 15 | 16 | float confidence = hm[position]; 17 | if (confidence != hmpool[position] || confidence < threshold) 18 | return; 19 | 20 | int index = atomicAdd(countptr, 1); 21 | if (index >= maxobjs) 22 | return; 23 | 24 | int channel_index = position / w_x_h; 25 | int classes = channel_index; 26 | int offsetChannel0 = position - channel_index * w_x_h; 27 | int offsetChannel1 = offsetChannel0 + w_x_h; 28 | 29 | int cx = offsetChannel0 % width; 30 | int cy = offsetChannel0 / width; 31 | 32 | ccutil::BBox* ptr = boxptr + index; 33 | float dx = reg[offsetChannel0]; 34 | float dy = reg[offsetChannel1]; 35 | float dw = wh[offsetChannel0]; 36 | float dh = wh[offsetChannel1]; 37 | 38 | ptr->x = ((cx + dx - dw * 0.5 - width * 0.5) * stride) / scale + imageWidth * 0.5; 39 | ptr->y = ((cy + dy - dh * 0.5 - height * 0.5) * stride) / scale + imageHeight * 0.5; 40 | ptr->r = ((cx + dx + dw * 0.5 - width * 0.5) * stride) / scale + imageWidth * 0.5; 41 | ptr->b = ((cy + dy + dh * 0.5 - height * 0.5) * stride) / scale + imageHeight * 0.5; 42 | ptr->score = confidence; 43 | ptr->label = classes; 44 | } 45 | 46 | const std::vector>& CTDetectBackend::forwardGPU(std::shared_ptr hm, std::shared_ptr hmpool, std::shared_ptr wh, 47 | std::shared_ptr reg, const std::vector& imageSize, float threshold, int maxobjs) { 48 | 49 | int count = hm->count(1); // w * h * c 50 | int width = hm->width(); 51 | int height = hm->height(); 52 | int batchSize = hm->num(); 53 | int channels = hm->channel(); 54 | int stride = 4; 55 | auto grid = gridDims(count); 56 | auto block = blockDims(count); 57 | 58 | size_t objsStoreSize = maxobjs * sizeof(ccutil::BBox) + sizeof(int); 59 | int heatmapArea = width * height; 60 | void* cpuPtr = getCPUMemory(objsStoreSize * batchSize); 61 | char* cpuPtrInput = (char*)cpuPtr; 62 | void* gpuPtr = getGPUMemory(objsStoreSize * batchSize); 63 | char* gpuPtrInput = (char*)gpuPtr; 64 | auto stream = getStream(); 65 | 66 | for (int i = 0; i < batchSize; ++i) { 67 | 68 | auto& imsize = imageSize[i]; 69 | float sw = width * stride / (float)imsize.width; 70 | float sh = height * stride / (float)imsize.height; 71 | float scale = std::min(sw, sh); 72 | 73 | float* hm_ptr = hm->gpu(i); 74 | float* hm_pool_ptr = hmpool->gpu(i); 75 | float* wh_ptr = wh->gpu(i); 76 | float* reg_ptr = reg->gpu(i); 77 | 78 | int* countPtr = (int*)gpuPtrInput; 79 | ccutil::BBox* boxPtr = (ccutil::BBox*)((char*)gpuPtrInput + sizeof(int)); 80 | 81 | cudaMemsetAsync(gpuPtrInput, 0, sizeof(int), stream); 82 | CTDetectBackend_forwardGPU <<< grid, block, 0, stream >>> (hm_ptr, hm_pool_ptr, wh_ptr, reg_ptr, countPtr, boxPtr, 83 | width, height, heatmapArea, channels, stride, threshold, maxobjs, imsize.width, imsize.height, scale, count); 84 | 85 | cudaMemcpyAsync(cpuPtrInput, gpuPtrInput, objsStoreSize, cudaMemcpyKind::cudaMemcpyDeviceToHost, stream); 86 | 87 | cpuPtrInput += objsStoreSize; 88 | gpuPtrInput += objsStoreSize; 89 | } 90 | cudaStreamSynchronize(stream); 91 | 92 | cpuPtrInput = (char*)cpuPtr; 93 | outputs_.resize(batchSize); 94 | 95 | for (int i = 0; i < batchSize; ++i, cpuPtrInput += objsStoreSize) { 96 | auto& output = outputs_[i]; 97 | output.clear(); 98 | 99 | int num = *((int*)cpuPtrInput); 100 | num = std::min(num, maxobjs); 101 | 102 | if (num == 0) 103 | continue; 104 | 105 | ccutil::BBox* ptr = (ccutil::BBox*)(cpuPtrInput + sizeof(int)); 106 | output.insert(output.begin(), ptr, ptr + num); 107 | } 108 | return outputs_; 109 | } 110 | }; -------------------------------------------------------------------------------- /src/infer/ct_detect_backend.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CT_DETECT_BACKEND_HPP 3 | #define CT_DETECT_BACKEND_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include "trt_backend.hpp" 9 | 10 | namespace TRTInfer { 11 | 12 | class CTDetectBackend : public Backend{ 13 | public: 14 | CTDetectBackend(CUStream stream = nullptr); 15 | 16 | const std::vector>& forwardGPU(std::shared_ptr hm, std::shared_ptr hmpool, std::shared_ptr wh, std::shared_ptr reg, 17 | const std::vector& imageSize, float threshold, int maxobjs); 18 | 19 | private: 20 | std::vector> outputs_; 21 | }; 22 | }; 23 | 24 | #endif // CT_DETECT_BACKEND_HPP -------------------------------------------------------------------------------- /src/infer/dbface_backend.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "dbface_backend.hpp" 4 | #include 5 | #include 6 | 7 | namespace TRTInfer { 8 | 9 | DBFaceBackend::DBFaceBackend(CUStream stream):Backend(stream){} 10 | 11 | static __device__ float commonExp(float value) { 12 | 13 | float gate = 1.0f; 14 | if (fabs(value) < gate) 15 | return value * exp(gate); 16 | 17 | if (value > 0) 18 | return exp(value); 19 | else 20 | return -exp(-value); 21 | } 22 | 23 | static __global__ void DBFaceBackend_forwardGPU(float* hm, float* hmpool, float* tlrb, float* landmark, int* countptr, FaceBox* boxptr, 24 | int width, int height, int w_x_h, int stride, float threshold, 25 | int maxobjs, int edge) { 26 | 27 | KERNEL_POSITION; 28 | 29 | float confidence = hm[position]; 30 | if (confidence != hmpool[position] || confidence < threshold) 31 | return; 32 | 33 | int index = atomicAdd(countptr, 1); 34 | if (index >= maxobjs) 35 | return; 36 | 37 | int cx = position % width; 38 | int cy = position / width; 39 | int oc0 = position; 40 | int oc1 = position + w_x_h; 41 | int oc2 = position + w_x_h * 2; 42 | int oc3 = position + w_x_h * 3; 43 | 44 | FaceBox* ptr = boxptr + index; 45 | float dx = tlrb[oc0]; 46 | float dy = tlrb[oc1]; 47 | float dr = tlrb[oc2]; 48 | float db = tlrb[oc3]; 49 | 50 | ptr->x = (cx - dx) * stride; 51 | ptr->y = (cy - dy) * stride; 52 | ptr->r = (cx + dr) * stride; 53 | ptr->b = (cy + db) * stride; 54 | ptr->score = confidence; 55 | ptr->label = 0; 56 | 57 | for (int k = 0; k < 5; ++k) { 58 | // xxxxx yyyyy 59 | float landmark_x = landmark[position + w_x_h * k] * 4; 60 | float landmark_y = landmark[position + w_x_h * (k + 5)] * 4; 61 | 62 | cv::Point2f& point = ptr->landmark[k]; 63 | point.x = (commonExp(landmark_x) + cx) * stride; 64 | point.y = (commonExp(landmark_y) + cy) * stride; 65 | } 66 | } 67 | 68 | const std::vector>& DBFaceBackend::forwardGPU(std::shared_ptr hm, std::shared_ptr hmpool, std::shared_ptr tlrb, 69 | std::shared_ptr landmark, float threshold, int maxobjs) { 70 | 71 | int width = hm->width(); 72 | int height = hm->height(); 73 | int batchSize = hm->num(); 74 | int count = hm->count(1); // c * h * w 75 | auto grid = gridDims(count); 76 | auto block = blockDims(count); 77 | 78 | size_t objsStoreSize = maxobjs * sizeof(FaceBox) + sizeof(int); 79 | int heatmapArea = width * height; 80 | void* cpuPtr = getCPUMemory(objsStoreSize * batchSize); 81 | char* cpuPtrInput = (char*)cpuPtr; 82 | void* gpuPtr = getGPUMemory(objsStoreSize * batchSize); 83 | char* gpuPtrInput = (char*)gpuPtr; 84 | int stride = 4; 85 | auto stream = getStream(); 86 | 87 | for (int i = 0; i < batchSize; ++i) { 88 | 89 | float* hm_ptr = hm->gpu(i); 90 | float* hm_pool_ptr = hmpool->gpu(i); 91 | float* tlrb_ptr = tlrb->gpu(i); 92 | float* landmark_ptr = landmark->gpu(i); 93 | 94 | int* countPtr = (int*)gpuPtrInput; 95 | FaceBox* boxPtr = (FaceBox*)((char*)gpuPtrInput + sizeof(int)); 96 | 97 | cudaMemsetAsync(gpuPtrInput, 0, sizeof(int), stream); 98 | DBFaceBackend_forwardGPU <<< grid, block, 0, stream >>> (hm_ptr, hm_pool_ptr, tlrb_ptr, landmark_ptr, countPtr, boxPtr, 99 | width, height, heatmapArea, stride, threshold, maxobjs, count); 100 | 101 | cudaMemcpyAsync(cpuPtrInput, gpuPtrInput, objsStoreSize, cudaMemcpyKind::cudaMemcpyDeviceToHost, stream); 102 | cpuPtrInput += objsStoreSize; 103 | gpuPtrInput += objsStoreSize; 104 | } 105 | cudaStreamSynchronize(stream); 106 | 107 | cpuPtrInput = (char*)cpuPtr; 108 | outputs_.resize(batchSize); 109 | 110 | for (int i = 0; i < batchSize; ++i, cpuPtrInput += objsStoreSize) { 111 | auto& output = outputs_[i]; 112 | output.clear(); 113 | 114 | int num = *((int*)cpuPtrInput); 115 | num = std::min(num, maxobjs); 116 | 117 | if (num == 0) 118 | continue; 119 | 120 | FaceBox* ptr = (FaceBox*)(cpuPtrInput + sizeof(int)); 121 | output.insert(output.begin(), ptr, ptr + num); 122 | } 123 | return outputs_; 124 | } 125 | }; -------------------------------------------------------------------------------- /src/infer/dbface_backend.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DBFACE_BACKEND_HPP 3 | #define DBFACE_BACKEND_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include "trt_backend.hpp" 9 | 10 | namespace TRTInfer { 11 | 12 | struct FaceBox : ccutil::BBox { 13 | cv::Point2f landmark[5]; 14 | 15 | FaceBox() {} 16 | FaceBox(const ccutil::BBox& other):ccutil::BBox(other) {} 17 | }; 18 | 19 | class DBFaceBackend : public Backend{ 20 | public: 21 | DBFaceBackend(CUStream stream = nullptr); 22 | 23 | const std::vector>& forwardGPU( 24 | std::shared_ptr hm, std::shared_ptr hmpool, std::shared_ptr tlrb, std::shared_ptr landmark, 25 | float threshold, int maxobjs = 100); 26 | 27 | private: 28 | std::vector> outputs_; 29 | }; 30 | }; 31 | 32 | #endif // DBFACE_BACKEND_HPP -------------------------------------------------------------------------------- /src/infer/task_pool.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/infer/task_pool.hpp -------------------------------------------------------------------------------- /src/infer/trt_backend.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "trt_backend.hpp" 4 | #include 5 | #include 6 | 7 | #define cuCheck(op) Assert((op) == cudaSuccess) 8 | 9 | namespace TRTInfer { 10 | 11 | Backend::Backend(CUStream stream) { 12 | 13 | this->stream_ = stream; 14 | this->ownStream_ = false; 15 | 16 | if (stream == nullptr) { 17 | cuCheck(cudaStreamCreate(&stream_)); 18 | ownStream_ = true; 19 | } 20 | } 21 | 22 | void* Backend::getCPUMemory(size_t size) { 23 | if (cpuMemSize_ >= size) 24 | return cpuMemory_; 25 | 26 | releaseCPUMemory(); 27 | cpuMemSize_ = size; 28 | cpuMemory_ = malloc(size); 29 | Assert(cpuMemory_ != nullptr); 30 | return cpuMemory_; 31 | } 32 | 33 | CUStream Backend::getStream() const { 34 | return stream_; 35 | } 36 | 37 | void Backend::releaseCPUMemory() { 38 | if (cpuMemory_) { 39 | free(cpuMemory_); 40 | cpuMemory_ = nullptr; 41 | cpuMemSize_ = 0; 42 | } 43 | } 44 | 45 | void* Backend::getGPUMemory(size_t size) { 46 | if (gpuMemSize_ >= size) 47 | return gpuMemory_; 48 | 49 | releaseGPUMemory(); 50 | gpuMemSize_ = size; 51 | 52 | cuCheck(cudaMalloc(&gpuMemory_, gpuMemSize_)); 53 | return gpuMemory_; 54 | } 55 | 56 | void Backend::releaseGPUMemory() { 57 | if (gpuMemory_) { 58 | cudaFree(gpuMemory_); 59 | gpuMemory_ = nullptr; 60 | gpuMemSize_ = 0; 61 | } 62 | } 63 | 64 | Backend::~Backend() { 65 | releaseGPUMemory(); 66 | releaseCPUMemory(); 67 | 68 | if (ownStream_) { 69 | cudaStreamDestroy(stream_); 70 | } 71 | stream_ = nullptr; 72 | } 73 | }; -------------------------------------------------------------------------------- /src/infer/trt_backend.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TRT_BACKEND_HPP 3 | #define TRT_BACKEND_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | #include "trt_infer.hpp" 9 | 10 | namespace TRTInfer { 11 | 12 | class Backend { 13 | public: 14 | Backend(CUStream stream = nullptr); 15 | virtual ~Backend(); 16 | 17 | protected: 18 | void* getCPUMemory(size_t size); 19 | void releaseCPUMemory(); 20 | 21 | void* getGPUMemory(size_t size); 22 | void releaseGPUMemory(); 23 | 24 | CUStream getStream() const; 25 | 26 | private: 27 | void* cpuMemory_ = nullptr; 28 | size_t cpuMemSize_ = 0; 29 | void* gpuMemory_ = nullptr; 30 | size_t gpuMemSize_ = 0; 31 | 32 | CUStream stream_ = nullptr; 33 | bool ownStream_ = false; 34 | }; 35 | }; 36 | 37 | #endif // TRT_BACKEND_HPP -------------------------------------------------------------------------------- /src/infer/trt_infer.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef TRT_INFER_HPP 4 | #define TRT_INFER_HPP 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | class __half; 13 | struct CUstream_st; 14 | 15 | namespace TRTInfer { 16 | 17 | typedef __half halfloat; 18 | typedef CUstream_st* CUStream; 19 | 20 | enum DataHead { 21 | DataHead_InGPU, 22 | DataHead_InCPU 23 | }; 24 | 25 | enum class DataType : int { 26 | dtFloat, 27 | dtHalfloat 28 | }; 29 | 30 | int dataTypeSize(DataType dt); 31 | 32 | class Tensor { 33 | public: 34 | Tensor(); 35 | Tensor(const Tensor& other); 36 | Tensor(int n, int c, int h, int w, DataType dtType = DataType::dtFloat); 37 | Tensor(int ndims, const int* dims, DataType dtType = DataType::dtFloat); 38 | Tensor(const std::vector& dims, DataType dtType = DataType::dtFloat); 39 | Tensor& operator = (const Tensor& other); 40 | virtual ~Tensor(); 41 | 42 | inline DataType type() const { return dtType_; } 43 | inline std::vector dims() const { return {num_, channel_, height_, width_}; } 44 | inline cv::Size size() const { return cv::Size(width_, height_); } 45 | inline int offset(int n = 0, int c = 0, int h = 0, int w = 0) { return ((n * this->channel_ + c) * this->height_ + h) * this->width_ + w; } 46 | inline int num() const { return num_; } 47 | inline int channel() const { return channel_; } 48 | inline int height() const { return height_; } 49 | inline int width() const { return width_; } 50 | inline int bytes() const { return bytes_; } 51 | inline int bytes(int start_axis) const { return count(start_axis) * elementSize(); } 52 | inline int elementSize() const { return dataTypeSize(dtType_); } 53 | 54 | void copyFrom(const Tensor& other); 55 | void release(); 56 | void setTo(float value); 57 | void setRandom(float low = 0, float high = 1); 58 | bool empty(); 59 | void resize(int n, int c = -1, int h = -1, int w = -1); 60 | void resize(int ndims, const int* dims); 61 | void resize(const std::vector& dims); 62 | void reshape(int n, int c = -1, int h = -1, int w = -1); 63 | void reshapeLike(const Tensor& other); 64 | void resizeLike(const Tensor& other); 65 | size_t count(int start_axis = 0) const; 66 | void print(); 67 | const char* shapeString(); 68 | 69 | void toGPU(bool copyedIfCPU = true); 70 | void toCPU(bool copyedIfGPU = true); 71 | void toHalf(); 72 | void toFloat(); 73 | 74 | inline const void* cpu() const { ((Tensor*)this)->toCPU(); return host_; } 75 | inline const void* gpu() const { ((Tensor*)this)->toGPU(); return device_; } 76 | inline void* cpu() { toCPU(); return host_; } 77 | inline void* gpu() { toGPU(); return device_; } 78 | 79 | template inline const DataT* cpu() const { ((Tensor*)this)->toCPU(); return (const DataT*)host_; } 80 | template inline const DataT* gpu() const { ((Tensor*)this)->toGPU(); return (const DataT*)device_; } 81 | template inline DataT* cpu() { toCPU(); return (DataT*)host_; } 82 | template inline DataT* gpu() { toGPU(); return (DataT*)device_; } 83 | template inline DataT* cpu(int n, int c = 0, int h = 0, int w = 0) { return cpu() + offset(n, c, h, w); } 84 | template inline DataT* gpu(int n, int c = 0, int h = 0, int w = 0) { return gpu() + offset(n, c, h, w); } 85 | template inline float& at(int n = 0, int c = 0, int h = 0, int w = 0) { return *(cpu() + offset(n, c, h, w)); } 86 | inline cv::Mat atMat(int n = 0, int c = 0) { return cv::Mat(height_, width_, CV_32F, cpu(n, c)); } 87 | 88 | void setMat(int n, const cv::Mat& image); 89 | 90 | void setNormMat(int n, const cv::Mat& image, float mean[3], float std[3]); 91 | void setNormMatGPU(int n, const cv::Mat& image, float mean[3], float std[3]); 92 | 93 | //result = (image - mean) * scale 94 | void setMatMeanScale(int n, const cv::Mat& image, float mean[3], float scale = 1.0f); 95 | Tensor transpose(int axis0, int axis1, int axis2, int axis3); 96 | void transposeInplace(int axis0, int axis1, int axis2, int axis3); 97 | void from(const void* ptr, int n, int c = 1, int h = 1, int w = 1, DataType dtType = DataType::dtFloat); 98 | 99 | void* getTempGPUMemory(size_t size); 100 | void releaseTempGPUMemory(); 101 | 102 | private: 103 | int num_ = 0, channel_ = 0, height_ = 0, width_ = 0; 104 | void* device_ = nullptr; 105 | void* host_ = nullptr; 106 | size_t capacity_ = 0; 107 | size_t bytes_ = 0; 108 | DataHead head_ = DataHead_InCPU; 109 | DataType dtType_ = DataType::dtFloat; 110 | char shapeString_[100]; 111 | void* tempGPUMemory_ = nullptr; 112 | size_t tempGPUMemoryLength = 0; 113 | }; 114 | 115 | class Engine { 116 | public: 117 | virtual bool load(const std::string& file) = 0; 118 | virtual void destroy() = 0; 119 | virtual void forward(bool sync = true) = 0; 120 | virtual int maxBatchSize() = 0; 121 | virtual CUStream getCUStream() = 0; 122 | virtual void synchronize() = 0; 123 | 124 | virtual std::shared_ptr input(int index = 0) = 0; 125 | virtual std::shared_ptr output(int index = 0) = 0; 126 | virtual std::shared_ptr tensor(const std::string& name) = 0; 127 | }; 128 | 129 | void setDevice(int device_id); 130 | std::shared_ptr loadEngine(const std::string& file); 131 | bool initNVPlugins(); 132 | }; //TRTInfer 133 | 134 | 135 | #endif //TRT_INFER_HPP -------------------------------------------------------------------------------- /src/infer/trt_infer_norm.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "trt_infer.hpp" 4 | #include 5 | 6 | namespace TRTInfer { 7 | static __global__ void ImageNormMeanStd_forwardGPU_impl(float* d0, float* d1, float* d2, float m0, float m1, float m2, float std0, float std1, float std2, unsigned char* src, int edge) { 8 | 9 | KERNEL_POSITION; 10 | 11 | float* pptr_dest[3] = {d0, d1, d2}; 12 | float mean[3] = {m0, m1, m2}; 13 | float std[3] = {std0, std1, std2}; 14 | 15 | int pos_real = position * 3; 16 | unsigned char* src_real = src + pos_real; 17 | for (int c = 0; c < 3; ++c) { 18 | unsigned char value = src_real[c]; 19 | float* dest_ptr = pptr_dest[c]; 20 | dest_ptr[position] = (value / 255.0f - mean[c]) / std[c]; 21 | } 22 | } 23 | 24 | void ImageNormMeanStd_forwardGPU(float* d0, float* d1, float* d2, float mean[3], float std[3], unsigned char* src, int nump, cudaStream_t stream) { 25 | 26 | auto grid = gridDims(nump); 27 | auto block = blockDims(nump); 28 | 29 | if (stream) { 30 | ImageNormMeanStd_forwardGPU_impl << > > (d0, d1, d2, mean[0], mean[1], mean[2], std[0], std[1], std[2], src, nump); 31 | } 32 | else { 33 | ImageNormMeanStd_forwardGPU_impl << > > (d0, d1, d2, mean[0], mean[1], mean[2], std[0], std[1], std[2], src, nump); 34 | } 35 | } 36 | }; -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 |  2 | 3 | #include 4 | #include "infer/trt_infer.hpp" 5 | 6 | namespace examples { 7 | 8 | // src/examples/*.cpp 9 | void center_net_coco2x_dcn(); 10 | void center_track_coco_tracking(); 11 | void dbface(); 12 | void onnx(); 13 | }; 14 | 15 | int main() { 16 | 17 | //save logs to file 18 | ccutil::setLoggerSaveDirectory("logs"); 19 | TRTInfer::setDevice(0); 20 | 21 | examples::onnx(); 22 | examples::dbface(); 23 | examples::center_net_coco2x_dcn(); 24 | examples::center_track_coco_tracking(); 25 | return 0; 26 | } -------------------------------------------------------------------------------- /src/onnx/onnx_pb.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) ONNX Project Contributors. 2 | // Licensed under the MIT license. 3 | 4 | #ifndef ONNX_ONNX_PB_H 5 | #define ONNX_ONNX_PB_H 6 | 7 | // Defines ONNX_EXPORT and ONNX_IMPORT. On Windows, this corresponds to 8 | // different declarations (dllexport and dllimport). On Linux/Mac, it just 9 | // resolves to the same "default visibility" setting. 10 | #if defined(_MSC_VER) 11 | #if defined(ONNX_BUILD_SHARED_LIBS) || defined(ONNX_BUILD_MAIN_LIB) 12 | #define ONNX_EXPORT __declspec(dllexport) 13 | #define ONNX_IMPORT __declspec(dllimport) 14 | #else 15 | #define ONNX_EXPORT 16 | #define ONNX_IMPORT 17 | #endif 18 | #else 19 | #if defined(__GNUC__) 20 | #define ONNX_EXPORT __attribute__((__visibility__("default"))) 21 | #else 22 | #define ONNX_EXPORT 23 | #endif 24 | #define ONNX_IMPORT ONNX_EXPORT 25 | #endif 26 | 27 | // ONNX_API is a macro that, depends on whether you are building the 28 | // main ONNX library or not, resolves to either ONNX_EXPORT or 29 | // ONNX_IMPORT. 30 | // 31 | // This is used in e.g. ONNX's protobuf files: when building the main library, 32 | // it is defined as ONNX_EXPORT to fix a Windows global-variable-in-dll 33 | // issue, and for anyone dependent on ONNX it will be defined as 34 | // ONNX_IMPORT. ONNX_BUILD_MAIN_LIB can also be set when being built 35 | // statically if ONNX is being linked into a shared library that wants 36 | // to export the ONNX APIs and classes. 37 | // 38 | // More details on Windows dllimport / dllexport can be found at 39 | // https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx 40 | // 41 | // This solution is similar to 42 | // https://github.com/pytorch/pytorch/blob/master/caffe2/core/common.h 43 | #define ONNX_API 44 | #include "onnx_ONNX_NAMESPACE-ml.pb.h" 45 | 46 | #endif // ! ONNX_ONNX_PB_H 47 | -------------------------------------------------------------------------------- /src/onnx_parser/ImporterContext.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "onnx2trt.hpp" 26 | 27 | #include 28 | #include 29 | 30 | namespace onnx2trt { 31 | 32 | class ImporterContext final : public IImporterContext { 33 | nvinfer1::INetworkDefinition* _network; 34 | nvinfer1::ILogger* _logger; 35 | std::list _owned_plugin_instances; 36 | std::list> _temp_bufs; 37 | std::unordered_map _user_inputs; 38 | std::unordered_map _user_outputs; 39 | std::unordered_map _opsets; 40 | public: 41 | ImporterContext(nvinfer1::INetworkDefinition* network, 42 | nvinfer1::ILogger* logger) 43 | : _network(network), _logger(logger) {} 44 | virtual nvinfer1::INetworkDefinition* network() override { return _network; } 45 | nvinfer1::ILogger& logger() { return *_logger; } 46 | virtual ShapedWeights createTempWeights(ShapedWeights::DataType type, 47 | nvinfer1::Dims shape) override { 48 | ShapedWeights weights(type, nullptr, shape); 49 | _temp_bufs.push_back(std::vector(weights.size_bytes())); 50 | weights.values = _temp_bufs.back().data(); 51 | return weights; 52 | } 53 | virtual nvinfer1::IPluginV2Layer* addPluginV2(PluginV2* plugin, 54 | std::vector const& inputs) override { 55 | // Note: Plugins are wrapped here to make them work with 56 | // onnx2trt::PluginFactory. 57 | auto* wrapped_plugin = new TypeSerializingPlugin(plugin); 58 | _owned_plugin_instances.emplace_back(wrapped_plugin); 59 | #if NV_TENSORRT_MAJOR > 4 60 | return _network->addPluginV2(inputs.data(), inputs.size(), *wrapped_plugin); 61 | #endif 62 | } 63 | bool setUserInput(const char* name, nvinfer1::ITensor* input) { 64 | _user_inputs[name] = input; 65 | return true; 66 | } 67 | bool setUserOutput(const char* name, nvinfer1::ITensor** output) { 68 | _user_outputs[name] = output; 69 | return true; 70 | } 71 | nvinfer1::ITensor* getUserInput(const char* name) { 72 | if( !_user_inputs.count(name) ) { 73 | return nullptr; 74 | } else { 75 | return _user_inputs.at(name); 76 | } 77 | } 78 | nvinfer1::ITensor** getUserOutput(const char* name) { 79 | if( !_user_outputs.count(name) ) { 80 | return nullptr; 81 | } else { 82 | return _user_outputs.at(name); 83 | } 84 | } 85 | std::unordered_map const& getUserOutputs() const { 86 | return _user_outputs; 87 | } 88 | void clearOpsets() { _opsets.clear(); } 89 | void addOpset(std::string domain, int64_t version) { 90 | _opsets.emplace(domain, version); 91 | } 92 | virtual int64_t getOpsetVersion(const char* domain="") const override { 93 | if (_opsets.empty()) { 94 | return 1; 95 | } else if (_opsets.size() == 1) { 96 | return _opsets.begin()->second; 97 | } else { 98 | assert(_opsets.count(domain)); 99 | return _opsets.at(domain); 100 | } 101 | } 102 | }; 103 | 104 | } // namespace onnx2trt 105 | -------------------------------------------------------------------------------- /src/onnx_parser/InstanceNormalization.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | #include 25 | 26 | #include "plugin.hpp" 27 | #include "serialize.hpp" 28 | #include 29 | #include 30 | 31 | typedef unsigned short half_type; 32 | 33 | namespace { 34 | constexpr const char* INSTANCE_PLUGIN_VERSION{"001"}; 35 | constexpr const char* INSTANCE_PLUGIN_NAME{"InstanceNormalization"}; 36 | } 37 | 38 | class InstanceNormalizationPlugin final : public onnx2trt::PluginV2 { 39 | float _epsilon; 40 | int _nchan; 41 | std::vector _h_scale; 42 | std::vector _h_bias; 43 | float* _d_scale; 44 | float* _d_bias; 45 | bool _initialized; 46 | nvinfer1::Weights _scale, _bias; 47 | cudnnHandle_t _cudnn_handle; 48 | cudnnTensorDescriptor_t _x_desc, _y_desc, _b_desc; 49 | protected: 50 | void deserialize(void const* serialData, size_t serialLength) { 51 | deserializeBase(serialData, serialLength); 52 | deserialize_value(&serialData, &serialLength, &_epsilon); 53 | deserialize_value(&serialData, &serialLength, &_nchan); 54 | deserialize_value(&serialData, &serialLength, &_h_scale); 55 | deserialize_value(&serialData, &serialLength, &_h_bias); 56 | } 57 | size_t getSerializationSize() const override { 58 | return (serialized_size(_epsilon) + 59 | serialized_size(_nchan) + 60 | serialized_size(_h_scale) + 61 | serialized_size(_h_bias)) + getBaseSerializationSize(); 62 | } 63 | void serialize(void *buffer) const override { 64 | serializeBase(buffer); 65 | serialize_value(&buffer, _epsilon); 66 | serialize_value(&buffer, _nchan); 67 | serialize_value(&buffer, _h_scale); 68 | serialize_value(&buffer, _h_bias); 69 | } 70 | public: 71 | InstanceNormalizationPlugin(float epsilon, 72 | nvinfer1::Weights const& scale, 73 | nvinfer1::Weights const& bias); 74 | InstanceNormalizationPlugin(void const* serialData, size_t serialLength) : _initialized(false) { 75 | this->deserialize(serialData, serialLength); 76 | } 77 | const char* getPluginType() const override { return INSTANCE_PLUGIN_NAME; } 78 | 79 | virtual void destroy() override { delete this; } 80 | 81 | virtual nvinfer1::IPluginV2* clone() const override { return new InstanceNormalizationPlugin{_epsilon, _scale, _bias}; } 82 | 83 | virtual const char* getPluginVersion() const override { return INSTANCE_PLUGIN_VERSION; } 84 | 85 | virtual void setPluginNamespace(const char* pluginNamespace) override {} 86 | 87 | virtual const char* getPluginNamespace() const override { return ""; } 88 | 89 | bool supportsFormat(nvinfer1::DataType type, 90 | nvinfer1::PluginFormat format) const override; 91 | int getNbOutputs() const override { return 1; } 92 | nvinfer1::Dims getOutputDimensions(int index, 93 | const nvinfer1::Dims *inputDims, 94 | int nbInputs) override { 95 | assert(index == 0); 96 | assert(inputDims); 97 | assert(nbInputs == 1); 98 | return *inputDims; 99 | } 100 | int initialize() override; 101 | void terminate() override; 102 | int enqueue(int batchSize, 103 | const void *const *inputs, void **outputs, 104 | void *workspace, cudaStream_t stream) override; 105 | size_t getWorkspaceSize(int maxBatchSize) const override; 106 | ~InstanceNormalizationPlugin(); 107 | }; 108 | 109 | class InstanceNormalizationPluginCreator : public nvinfer1::IPluginCreator 110 | { 111 | public: 112 | InstanceNormalizationPluginCreator() {} 113 | 114 | ~InstanceNormalizationPluginCreator() {} 115 | 116 | const char* getPluginName() const { return INSTANCE_PLUGIN_NAME; } 117 | 118 | const char* getPluginVersion() const { return INSTANCE_PLUGIN_VERSION; } 119 | 120 | const nvinfer1::PluginFieldCollection* getFieldNames() { std::cerr<< "Function not implemented" << std::endl; return nullptr; } 121 | 122 | nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) { std::cerr<< "Function not implemented" << std::endl; return nullptr; } 123 | 124 | nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) { return new InstanceNormalizationPlugin{serialData, serialLength}; } 125 | 126 | void setPluginNamespace(const char* libNamespace) { mNamespace = libNamespace; } 127 | 128 | const char* getPluginNamespace() const { return mNamespace.c_str(); } 129 | private: 130 | std::string mNamespace; 131 | }; 132 | 133 | REGISTER_TENSORRT_PLUGIN(InstanceNormalizationPluginCreator); 134 | -------------------------------------------------------------------------------- /src/onnx_parser/ModelImporter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "NvOnnxParser.h" 26 | #include "ImporterContext.hpp" 27 | #include "common.hpp" 28 | #include "utils.hpp" 29 | #include "builtin_op_importers.hpp" 30 | 31 | namespace onnx2trt { 32 | 33 | class ModelImporter final : public nvonnxparser::IParser { 34 | string_map _op_importers; 35 | ImporterContext _importer_ctx; 36 | std::list<::ONNX_NAMESPACE::ModelProto> _onnx_models; // Needed for ownership of weights 37 | int _current_node; 38 | std::vector _errors; 39 | std::vector _inputDims; 40 | 41 | Status importModel(::ONNX_NAMESPACE::ModelProto const &model, 42 | uint32_t weight_count, 43 | onnxTensorDescriptorV1 const *weight_descriptors); 44 | NodeImportResult importNode(::ONNX_NAMESPACE::NodeProto const& node, 45 | std::vector& inputs, 46 | std::vector& output_names); 47 | public: 48 | ModelImporter(nvinfer1::INetworkDefinition* network, 49 | nvinfer1::ILogger* logger, const std::vector& dims) 50 | : _op_importers(getBuiltinOpImporterMap()), 51 | _importer_ctx(network, logger), _inputDims(dims) {} 52 | bool parseWithWeightDescriptors( 53 | void const *serialized_onnx_model, size_t serialized_onnx_model_size, 54 | uint32_t weight_count, 55 | onnxTensorDescriptorV1 const *weight_descriptors) override; 56 | bool parse(void const *serialized_onnx_model, 57 | size_t serialized_onnx_model_size) override; 58 | bool parseFromFile(const char* onnxModelFile, int verbosity) override; 59 | bool supportsModel(void const *serialized_onnx_model, 60 | size_t serialized_onnx_model_size, 61 | SubGraphCollection_t &sub_graph_collection) override; 62 | 63 | bool supportsOperator(const char* op_name) const override; 64 | void destroy() override { delete this; } 65 | //virtual void registerOpImporter(std::string op, 66 | // NodeImporter const &node_importer) override { 67 | // // Note: This allows existing importers to be replaced 68 | // _op_importers[op] = node_importer; 69 | //} 70 | //virtual Status const &setInput(const char *name, 71 | // nvinfer1::ITensor *input) override; 72 | //virtual Status const& setOutput(const char* name, nvinfer1::ITensor** output) override; 73 | int getNbErrors() const override { return _errors.size(); } 74 | nvonnxparser::IParserError const* getError(int index) const override { 75 | assert(0 <= index && index < (int)_errors.size()); 76 | return &_errors[index]; 77 | } 78 | void clearErrors() override { _errors.clear(); } 79 | }; 80 | 81 | } // namespace onnx2trt 82 | -------------------------------------------------------------------------------- /src/onnx_parser/NvOnnxParser.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "NvOnnxParser.h" 24 | #include "ModelImporter.hpp" 25 | 26 | void* createNvOnnxParser_INTERNAL(void* network_, void* logger_, int version, std::vector inputDims) { 27 | auto network = static_cast(network_); 28 | auto logger = static_cast(logger_); 29 | return new onnx2trt::ModelImporter(network, logger, inputDims); 30 | } 31 | 32 | int getNvOnnxParserVersion() { 33 | return NV_ONNX_PARSER_VERSION; 34 | } 35 | -------------------------------------------------------------------------------- /src/onnx_parser/NvOnnxParserTypedefs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | typedef std::size_t NodeProtoUniqueID; 28 | typedef std::vector NodesContainer_t; 29 | typedef std::pair, bool> SubGraph_t; 30 | typedef std::vector SubGraphCollection_t; -------------------------------------------------------------------------------- /src/onnx_parser/OnnxAttrs.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "OnnxAttrs.hpp" 24 | #include "ShapedWeights.hpp" 25 | #include "onnx2trt_utils.hpp" 26 | 27 | template<> float OnnxAttrs::get(std::string key) const { 28 | return this->at(key)->f(); 29 | } 30 | template<> int OnnxAttrs::get(std::string key) const { 31 | return this->at(key)->i(); 32 | } 33 | template<> bool OnnxAttrs::get(std::string key) const { 34 | int value = this->at(key)->i(); 35 | assert(value == bool(value)); 36 | return bool(value); 37 | } 38 | template<> std::string OnnxAttrs::get(std::string key) const { 39 | return this->at(key)->s(); 40 | } 41 | template<> std::vector OnnxAttrs::get>(std::string key) const { 42 | auto attr = this->at(key)->ints(); 43 | return std::vector(attr.begin(), attr.end()); 44 | } 45 | 46 | template<> std::vector OnnxAttrs::get>(std::string key) const { 47 | auto attr = this->at(key)->ints(); 48 | return std::vector(attr.begin(), attr.end()); 49 | } 50 | 51 | template<> std::vector OnnxAttrs::get>(std::string key) const { 52 | auto attr = this->at(key)->floats(); 53 | return std::vector(attr.begin(), attr.end()); 54 | } 55 | template<> nvinfer1::Dims OnnxAttrs::get(std::string key) const { 56 | auto values = this->get>(key); 57 | nvinfer1::Dims dims; 58 | dims.nbDims = values.size(); 59 | std::copy(values.begin(), values.end(), dims.d); 60 | // Note: No dimension type information is included 61 | return dims; 62 | } 63 | template<> nvinfer1::DimsHW OnnxAttrs::get(std::string key) const { 64 | nvinfer1::Dims dims = this->get(key); 65 | assert(dims.nbDims == 2); 66 | return nvinfer1::DimsHW(dims.d[0], dims.d[1]); 67 | } 68 | template<> nvinfer1::Permutation OnnxAttrs::get(std::string key) const { 69 | auto values = this->get>(key); 70 | nvinfer1::Permutation perm; 71 | std::copy(values.begin(), values.end(), perm.order); 72 | // Fill unused values with identity permutation 73 | for( int i=values.size(); i onnx2trt::ShapedWeights OnnxAttrs::get(std::string key) const { 79 | ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = this->at(key)->t(); 80 | onnx2trt::ShapedWeights weights; 81 | convert_onnx_weights(onnx_weights_tensor, &weights); 82 | return weights; 83 | } 84 | -------------------------------------------------------------------------------- /src/onnx_parser/OnnxAttrs.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | class OnnxAttrs { 31 | template 32 | using string_map = std::unordered_map; 33 | typedef string_map<::ONNX_NAMESPACE::AttributeProto const*> AttrMap; 34 | AttrMap _attrs; 35 | public: 36 | explicit OnnxAttrs(::ONNX_NAMESPACE::NodeProto const& onnx_node) { 37 | for( auto const& attr : onnx_node.attribute() ) { 38 | _attrs.insert({attr.name(), &attr}); 39 | } 40 | } 41 | bool count(std::string key) const { return _attrs.count(key); } 42 | ::ONNX_NAMESPACE::AttributeProto const* at(std::string key) const { 43 | if( !_attrs.count(key) ) { 44 | throw std::out_of_range("Attribute not found: " + key); 45 | } 46 | return _attrs.at(key); 47 | } 48 | template T get(std::string key) const; 49 | template T get(std::string key, T const& default_value) const { 50 | return _attrs.count(key) ? this->get(key) : default_value; 51 | } 52 | }; 53 | -------------------------------------------------------------------------------- /src/onnx_parser/ResizeNearest.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "ResizeNearest.hpp" 27 | 28 | // TODO: Move this to a common header 29 | inline bool is_CHW(nvinfer1::Dims const& dims) { 30 | return (dims.nbDims == 3 && 31 | dims.type[0] == nvinfer1::DimensionType::kCHANNEL && 32 | dims.type[1] == nvinfer1::DimensionType::kSPATIAL && 33 | dims.type[2] == nvinfer1::DimensionType::kSPATIAL); 34 | } 35 | 36 | nvinfer1::Dims ResizeNearestPlugin::getOutputDimensions(int index, 37 | const nvinfer1::Dims *inputDims, 38 | int nbInputs) { 39 | assert(nbInputs == 1); 40 | nvinfer1::Dims const& input = inputDims[0]; 41 | assert(is_CHW(input)); 42 | assert(_ndims == 2); 43 | assert(index == 0); 44 | nvinfer1::Dims output; 45 | output.nbDims = input.nbDims; 46 | int s = 0; 47 | for( int d=0; dgetOutputDimensions(0, &this->getInputDims(0), 1); 60 | assert(is_CHW(this->getInputDims(0))); 61 | assert(is_CHW(_output_dims)); 62 | assert(_ndims == 2); 63 | return 0; 64 | } 65 | 66 | template 67 | __global__ 68 | void resize_nearest_kernel_2d(int nbatch, 69 | float2 scale, 70 | int2 osize, 71 | Data const* idata, int istride, int ibatchstride, 72 | Data* odata, int ostride, int obatchstride) { 73 | int x0 = threadIdx.x + blockIdx.x * blockDim.x; 74 | int y0 = threadIdx.y + blockIdx.y * blockDim.y; 75 | int z0 = blockIdx.z; 76 | for( int batch=z0; batchgetInputDims(0); 92 | int nchan = input_dims.d[0]; 93 | switch( _ndims ) { 94 | case 2: { 95 | float2 scale = {_scale[1], _scale[0]}; 96 | int2 osize = {_output_dims.d[2], _output_dims.d[1]}; 97 | int istride = input_dims.d[2]; 98 | int ostride = _output_dims.d[2]; 99 | int ibatchstride = input_dims.d[1] * istride; 100 | int obatchstride = _output_dims.d[1] * ostride; 101 | dim3 block(32, 16); 102 | dim3 grid((osize.x - 1) / block.x + 1, 103 | (osize.y - 1) / block.y + 1, 104 | std::min(batchSize * nchan, 65535)); 105 | if (getDataType()==nvinfer1::DataType::kFLOAT) { 106 | resize_nearest_kernel_2d<<>> 107 | (batchSize * nchan, scale, osize, 108 | static_cast( inputs[0]), istride, ibatchstride, 109 | static_cast(outputs[0]), ostride, obatchstride); 110 | } else { 111 | resize_nearest_kernel_2d<<>> 112 | (batchSize * nchan, scale, osize, 113 | static_cast<__half const*>( inputs[0]), istride, ibatchstride, 114 | static_cast<__half* >(outputs[0]), ostride, obatchstride); 115 | } 116 | return cudaGetLastError() != cudaSuccess; 117 | } 118 | default: return -1; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/onnx_parser/ResizeNearest.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | #include 25 | 26 | #include "plugin.hpp" 27 | #include "serialize.hpp" 28 | 29 | #include 30 | 31 | namespace { 32 | constexpr const char* RESIZE_PLUGIN_VERSION{"001"}; 33 | constexpr const char* RESIZE_PLUGIN_NAME{"ResizeNearest"}; 34 | } 35 | 36 | class ResizeNearestPlugin final : public onnx2trt::PluginV2 { 37 | int _ndims; 38 | float _scale[nvinfer1::Dims::MAX_DIMS]; 39 | nvinfer1::Dims _output_dims; 40 | protected: 41 | void deserialize(void const* serialData, size_t serialLength) { 42 | deserializeBase(serialData, serialLength); 43 | deserialize_value(&serialData, &serialLength, &_ndims); 44 | deserialize_value(&serialData, &serialLength, &_scale); 45 | } 46 | size_t getSerializationSize() const override { 47 | return serialized_size(_ndims) + serialized_size(_scale) + getBaseSerializationSize(); 48 | } 49 | void serialize(void *buffer) const override { 50 | serializeBase(buffer); 51 | serialize_value(&buffer, _ndims); 52 | serialize_value(&buffer, _scale); 53 | } 54 | public: 55 | ResizeNearestPlugin(std::vector const& scale) 56 | : _ndims(scale.size()) { 57 | assert(scale.size() <= nvinfer1::Dims::MAX_DIMS); 58 | std::copy(scale.begin(), scale.end(), _scale); 59 | } 60 | ResizeNearestPlugin(void const* serialData, size_t serialLength) { 61 | this->deserialize(serialData, serialLength); 62 | } 63 | virtual const char* getPluginType() const override { return RESIZE_PLUGIN_NAME; } 64 | 65 | virtual void destroy() override { delete this; } 66 | 67 | virtual nvinfer1::IPluginV2* clone() const override { return new ResizeNearestPlugin{std::vector(_scale, _scale + _ndims)}; } 68 | 69 | virtual const char* getPluginVersion() const override { return RESIZE_PLUGIN_VERSION; } 70 | 71 | virtual void setPluginNamespace(const char* pluginNamespace) override {} 72 | 73 | virtual const char* getPluginNamespace() const override { return ""; } 74 | 75 | virtual int getNbOutputs() const override { return 1; } 76 | virtual nvinfer1::Dims getOutputDimensions(int index, 77 | const nvinfer1::Dims *inputs, int nbInputDims) override; 78 | virtual int initialize() override; 79 | int enqueue(int batchSize, 80 | const void *const *inputs, void **outputs, 81 | void *workspace, cudaStream_t stream) override; 82 | }; 83 | 84 | class ResizeNearestPluginCreator : public nvinfer1::IPluginCreator 85 | { 86 | public: 87 | ResizeNearestPluginCreator() {} 88 | 89 | ~ResizeNearestPluginCreator() {} 90 | 91 | const char* getPluginName() const { return RESIZE_PLUGIN_NAME; } 92 | 93 | const char* getPluginVersion() const { return RESIZE_PLUGIN_VERSION; } 94 | 95 | const nvinfer1::PluginFieldCollection* getFieldNames() { std::cerr<< "Function not implemented" << std::endl; return nullptr; } 96 | 97 | nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) { std::cerr<< "Function not implemented" << std::endl; return nullptr; } 98 | 99 | nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) { return new ResizeNearestPlugin{serialData, serialLength}; } 100 | 101 | void setPluginNamespace(const char* libNamespace) { mNamespace = libNamespace; } 102 | 103 | const char* getPluginNamespace() const { return mNamespace.c_str(); } 104 | private: 105 | std::string mNamespace; 106 | }; 107 | 108 | REGISTER_TENSORRT_PLUGIN(ResizeNearestPluginCreator); 109 | -------------------------------------------------------------------------------- /src/onnx_parser/ShapedWeights.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "ShapedWeights.hpp" 24 | #include "trt_utils.hpp" 25 | #include "onnx2trt_utils.hpp" 26 | 27 | namespace onnx2trt { 28 | 29 | bool convertINT64(void* weightValues, const size_t nbWeights, std::vector& converted_weights) 30 | { 31 | int64_t * weights = static_cast(weightValues); 32 | for (size_t i = 0; i < nbWeights; i++) 33 | { 34 | if (weights[i] > INT32_MAX || weights[i] < INT32_MIN) 35 | { 36 | return false; 37 | } 38 | else 39 | { 40 | converted_weights[i] = static_cast(weights[i]); 41 | } 42 | } 43 | return true; 44 | } 45 | 46 | size_t ShapedWeights::count() const { 47 | if( this->values == nullptr && this->shape.nbDims == 0 ) 48 | { 49 | return 0; 50 | } 51 | else 52 | { 53 | // TRT supports scalars, so 0D tensors should have a count of 1. 54 | size_t c = 1; 55 | for( int i=0; ishape.nbDims; ++i ) 56 | { 57 | c *= this->shape.d[i]; 58 | } 59 | return c; 60 | } 61 | } 62 | 63 | ShapedWeights ShapedWeights::empty(DataType type) { 64 | return ShapedWeights(type, nullptr, nvinfer1::Dims{0}); 65 | } 66 | 67 | ShapedWeights::ShapedWeights() : values(nullptr), shape{0} {} 68 | 69 | ShapedWeights::ShapedWeights(DataType type_, void* values_, nvinfer1::Dims shape_) 70 | : type(type_), values(values_), shape(shape_) { 71 | // Note: this->shape.type[] is not used 72 | } 73 | 74 | size_t ShapedWeights::size_bytes() const { 75 | return this->count() * get_dtype_size(this->type); 76 | } 77 | 78 | ShapedWeights::operator bool() const { 79 | return (bool)this->values; 80 | } 81 | 82 | ShapedWeights::operator nvinfer1::Weights() const { 83 | nvinfer1::Weights w {}; 84 | // If INT64 weights, check if all the values can be cast down to INT32. 85 | if (this->type == ::ONNX_NAMESPACE::TensorProto::INT64) 86 | { 87 | cout << "WARNING: Your ONNX model has been generated with INT64 weights, " 88 | << "while TensorRT does not natively support INT64. " 89 | << "Attempting to cast down to INT32." << endl; 90 | std::vector int32_weights; 91 | int32_weights.resize(this->count()); 92 | 93 | if (!onnx2trt::convertINT64(this->values, this->count(), int32_weights)) 94 | { 95 | cerr << "ERROR: Weights cannot be cast down to INT32." << endl; 96 | // Return empty w on failure 97 | return w; 98 | } 99 | else 100 | { 101 | void * int32_weights_ptr = static_cast(int32_weights.data()); 102 | std::memcpy(this->values, int32_weights_ptr, int32_weights.size() * sizeof(int32_t)); 103 | w.values = this->values; 104 | cout << "Successfully casted down to INT32." << endl; 105 | } 106 | } 107 | else 108 | { 109 | w.values = this->values; 110 | } 111 | bool supported_type = convert_dtype(this->type, &w.type); 112 | (void)supported_type; 113 | assert(supported_type); 114 | w.count = this->count(); 115 | return w; 116 | } 117 | 118 | template 119 | void transpose2DWeights(ShapedWeights const& weights, 120 | nvinfer1::Dims const& new_shape, 121 | ShapedWeights* result) { 122 | DType const* src = reinterpret_cast(weights.values); 123 | DType* dst = reinterpret_cast(result->values); 124 | int src_stride = weights.shape.d[1]; 125 | int dst_stride = result->shape.d[1]; 126 | for (int i = 0; i < new_shape.d[0]; ++i) { 127 | for (int j = 0; j < new_shape.d[1]; ++j) { 128 | dst[i * dst_stride + j] = src[j * src_stride + i]; 129 | } 130 | } 131 | } 132 | 133 | bool transposeWeights(ShapedWeights const& weights, 134 | nvinfer1::Permutation const& perm, 135 | ShapedWeights* result) { 136 | nvinfer1::Dims shape = weights.shape; 137 | nvinfer1::Dims new_shape; 138 | new_shape.nbDims = shape.nbDims; 139 | for( int d=0; dshape.d[d] = new_shape.d[d]; 142 | } 143 | // TODO: Need to generalize this transpose implementation 144 | assert(perm.order[0] == 1 && perm.order[1] == 0); 145 | if (shape.nbDims == 2) { 146 | if (weights.type == ::ONNX_NAMESPACE::TensorProto::FLOAT) { 147 | transpose2DWeights(weights, new_shape, result); 148 | } else if (weights.type == ::ONNX_NAMESPACE::TensorProto::FLOAT16) { 149 | transpose2DWeights(weights, new_shape, result); 150 | } else { 151 | return false; 152 | } 153 | } else { 154 | // TODO: Implement general transposes and multiple data types 155 | // Unsupported weights transpose 156 | return false; 157 | } 158 | return true; 159 | } 160 | 161 | } // namespace onnx2trt 162 | -------------------------------------------------------------------------------- /src/onnx_parser/ShapedWeights.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | 28 | namespace onnx2trt { 29 | 30 | class ShapedWeights { 31 | public: 32 | typedef int32_t DataType; 33 | DataType type; 34 | void* values; 35 | nvinfer1::Dims shape; 36 | static ShapedWeights empty(DataType type); 37 | ShapedWeights(); 38 | explicit ShapedWeights(DataType type, void* values, nvinfer1::Dims shape_); 39 | size_t count() const; 40 | size_t size_bytes() const; 41 | operator bool() const; 42 | operator nvinfer1::Weights() const; 43 | }; 44 | 45 | bool transposeWeights(ShapedWeights const& weights, 46 | nvinfer1::Permutation const& perm, 47 | ShapedWeights* result); 48 | 49 | bool convertINT64(void * weightValues, const size_t nbWeights, std::vector& converted_weights); 50 | 51 | } // namespace onnx2trt 52 | -------------------------------------------------------------------------------- /src/onnx_parser/Split.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "Split.hpp" 28 | 29 | nvinfer1::Dims SplitPlugin::getOutputDimensions(int index, 30 | const nvinfer1::Dims *inputDims, 31 | int nbInputs) { 32 | assert(nbInputs == 1); 33 | assert(index < this->getNbOutputs()); 34 | nvinfer1::Dims const& input_dims = inputDims[0]; 35 | nvinfer1::Dims output_dims = input_dims; 36 | output_dims.d[_axis] = _output_lengths.at(index); 37 | return output_dims; 38 | } 39 | 40 | int SplitPlugin::initialize() { 41 | std::vector segment_offsets(1, 0); 42 | for( int i=0; igetNbOutputs(); ++i ) { 43 | segment_offsets.push_back(segment_offsets.back() + _output_lengths[i]); 44 | } 45 | _d_segment_offsets = segment_offsets; 46 | nvinfer1::Dims dims = this->getInputDims(0); 47 | _nx = 1; 48 | for( int i=dims.nbDims-1; i>_axis; --i ) { 49 | _nx *= dims.d[i]; 50 | } 51 | _ny = dims.d[_axis]; 52 | _nz = 1; 53 | for( int i=_axis-1; i>=0; --i ) { 54 | _nz *= dims.d[i]; 55 | } 56 | _d_output_ptrs.resize(this->getNbOutputs(), nullptr); 57 | return 0; 58 | } 59 | 60 | template 61 | __device__ 62 | int upper_bound(T const* vals, int n, T const& key) { 63 | int i = 0; 64 | while( n > 0 ) { 65 | int m = n / 2; 66 | int j = i + m; 67 | if( !(key < vals[j]) ) { 68 | i = j + 1; 69 | n -= m + 1; 70 | } else { 71 | n = m; 72 | } 73 | } 74 | return i; 75 | } 76 | 77 | template 78 | __global__ 79 | void split_kernel(int nsegment, 80 | int const* __restrict__ segment_offsets, 81 | T const* __restrict__ idata, 82 | T* const* odatas, 83 | int nx, 84 | int src_ny, 85 | int nz) { 86 | int x0 = threadIdx.x + blockIdx.x * blockDim.x; 87 | int src_y0 = threadIdx.y + blockIdx.y * blockDim.y; 88 | int z0 = threadIdx.z + blockIdx.z * blockDim.z; 89 | for( int z=z0; zgetInputDims(0); 106 | int const* d_segment_offsets_ptr = 107 | thrust::raw_pointer_cast(&_d_segment_offsets[0]); 108 | float const* idata = reinterpret_cast(inputs[0]); 109 | float* const* h_odatas = reinterpret_cast(outputs); 110 | float** odatas = thrust::raw_pointer_cast(&_d_output_ptrs[0]); 111 | cudaError_t cuda_status = 112 | cudaMemcpyAsync(odatas, h_odatas, 113 | _d_output_ptrs.size() * sizeof(float*), 114 | cudaMemcpyHostToDevice, stream); 115 | if( cuda_status != cudaSuccess ) { 116 | return 1; 117 | } 118 | int nz = _nz * batchSize; 119 | dim3 block(32, 16); 120 | dim3 grid(std::min((_nx - 1) / block.x + 1, 65535u), 121 | std::min((_ny - 1) / block.y + 1, 65535u), 122 | std::min((_nz - 1) / block.z + 1, 65535u)); 123 | if (getDataType()==nvinfer1::DataType::kFLOAT) { 124 | split_kernel<<>> 125 | (_d_segment_offsets.size(), d_segment_offsets_ptr, idata, odatas, 126 | _nx, _ny, nz); 127 | } else { 128 | split_kernel<<>> 129 | (_d_segment_offsets.size(), d_segment_offsets_ptr, (__half const*)idata, (__half**)odatas, 130 | _nx, _ny, nz); 131 | } 132 | return cudaGetLastError() != cudaSuccess; 133 | } 134 | -------------------------------------------------------------------------------- /src/onnx_parser/Split.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | #include 25 | 26 | #include "plugin.hpp" 27 | #include "serialize.hpp" 28 | 29 | #include 30 | #include 31 | 32 | namespace { 33 | constexpr const char* SPLIT_PLUGIN_VERSION{"001"}; 34 | constexpr const char* SPLIT_PLUGIN_NAME{"Split"}; 35 | } 36 | 37 | class SplitPlugin final : public onnx2trt::PluginV2 { 38 | int _axis; 39 | std::vector _output_lengths; 40 | int _nx, _ny, _nz; 41 | int _x_stride, _y_stride, _z_stride; 42 | thrust::device_vector _d_segment_offsets; 43 | thrust::device_vector _d_output_ptrs; 44 | protected: 45 | void deserialize(void const* serialData, size_t serialLength) { 46 | deserializeBase(serialData, serialLength); 47 | deserialize_value(&serialData, &serialLength, &_axis); 48 | deserialize_value(&serialData, &serialLength, &_output_lengths); 49 | } 50 | virtual size_t getSerializationSize() const override { 51 | return serialized_size(_axis) + serialized_size(_output_lengths) 52 | + getBaseSerializationSize(); 53 | } 54 | virtual void serialize(void *buffer) const override { 55 | serializeBase(buffer); 56 | serialize_value(&buffer, _axis); 57 | serialize_value(&buffer, _output_lengths); 58 | } 59 | public: 60 | SplitPlugin(int axis, std::vector const& output_lengths) 61 | : _axis(axis), _output_lengths(output_lengths) { 62 | assert(axis <= nvinfer1::Dims::MAX_DIMS); 63 | } 64 | SplitPlugin(void const* serialData, size_t serialLength) { 65 | this->deserialize(serialData, serialLength); 66 | } 67 | virtual const char* getPluginType() const override { return SPLIT_PLUGIN_NAME; } 68 | 69 | virtual void destroy() override { delete this; } 70 | 71 | virtual nvinfer1::IPluginV2* clone() const override { return new SplitPlugin{_axis, _output_lengths}; } 72 | 73 | virtual const char* getPluginVersion() const override { return SPLIT_PLUGIN_VERSION; } 74 | 75 | virtual void setPluginNamespace(const char* pluginNamespace) override {} 76 | 77 | virtual const char* getPluginNamespace() const override { return ""; } 78 | 79 | virtual int getNbOutputs() const override { return _output_lengths.size(); } 80 | virtual nvinfer1::Dims getOutputDimensions(int index, 81 | const nvinfer1::Dims *inputs, int nbInputDims) override; 82 | virtual int initialize() override; 83 | virtual int enqueue(int batchSize, 84 | const void *const *inputs, void **outputs, 85 | void *workspace, cudaStream_t stream) override; 86 | }; 87 | 88 | class SplitPluginCreator : public nvinfer1::IPluginCreator 89 | { 90 | public: 91 | SplitPluginCreator() {} 92 | 93 | ~SplitPluginCreator() {} 94 | 95 | const char* getPluginName() const { return SPLIT_PLUGIN_NAME; } 96 | 97 | const char* getPluginVersion() const { return SPLIT_PLUGIN_VERSION; } 98 | 99 | const nvinfer1::PluginFieldCollection* getFieldNames() { std::cerr<< "Function not implemented" << std::endl; return nullptr; } 100 | 101 | nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) { std::cerr<< "Function not implemented" << std::endl; return nullptr; } 102 | 103 | nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) { return new SplitPlugin{serialData, serialLength}; } 104 | 105 | void setPluginNamespace(const char* libNamespace) { mNamespace = libNamespace; } 106 | 107 | const char* getPluginNamespace() const { return mNamespace.c_str(); } 108 | private: 109 | std::string mNamespace; 110 | }; 111 | 112 | REGISTER_TENSORRT_PLUGIN(SplitPluginCreator); 113 | -------------------------------------------------------------------------------- /src/onnx_parser/Status.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "NvOnnxParser.h" 26 | 27 | #include 28 | #include 29 | 30 | #define MAKE_ERROR(desc, code) \ 31 | Status((code), (desc), __FILE__, __LINE__, __func__) 32 | 33 | #define ASSERT(condition, error_code) do { \ 34 | if( !(condition) ) { \ 35 | return MAKE_ERROR("Assertion failed: " #condition, (error_code)); \ 36 | } \ 37 | } while(0) 38 | 39 | #define MAKE_INPUT_ERROR(desc, code, name) \ 40 | Status((code), (desc), name, __LINE__, __func__) 41 | 42 | #define ASSERT_INPUT(condition, error_code, name) do {\ 43 | if ( !(condition) ) {\ 44 | return MAKE_INPUT_ERROR("Assertion failed: " #condition, (error_code), (name)); \ 45 | } \ 46 | } while(0) 47 | 48 | #define ASSERT_C(condition, error_code) do { \ 49 | if( !(condition) ) { \ 50 | return error_code; \ 51 | } \ 52 | } while(0) 53 | 54 | #define GET_VALUE(value_or_error_, result_ptr) do { \ 55 | auto const& value_or_error = value_or_error_; \ 56 | if( value_or_error.is_error() ) { \ 57 | return value_or_error.error(); \ 58 | } else { \ 59 | *result_ptr = value_or_error.value(); \ 60 | } \ 61 | } while(0) 62 | 63 | #define TRT_CHECK(call) do { \ 64 | Status status = call; \ 65 | if( !status.is_success() ) { \ 66 | return status; \ 67 | } \ 68 | } while(0) 69 | 70 | namespace onnx2trt { 71 | 72 | using nvonnxparser::ErrorCode; 73 | 74 | class Status : public nvonnxparser::IParserError { 75 | ErrorCode _code; 76 | std::string _desc; 77 | std::string _file; 78 | int _line; 79 | std::string _func; 80 | int _node; 81 | public: 82 | static Status success() { return Status(ErrorCode::kSUCCESS); } 83 | Status() {} 84 | explicit Status(ErrorCode code, std::string desc="", 85 | std::string file="", int line=0, std::string func="", 86 | int node=-1) 87 | : _code(code), _desc(desc), _file(file), _line(line), _func(func), 88 | _node(node) {} 89 | ErrorCode code() const override { return _code; } 90 | const char* desc() const override { return _desc.c_str(); } 91 | const char* file() const override { return _file.c_str(); } 92 | int line() const override { return _line; } 93 | const char* func() const override { return _func.c_str(); } 94 | int node() const override { return _node; } 95 | bool is_error() const { return _code != ErrorCode::kSUCCESS; } 96 | bool is_success() const { return _code == ErrorCode::kSUCCESS; } 97 | void setNode(int node) { _node = node; } 98 | }; 99 | 100 | template 101 | class ValueOrStatus { 102 | bool _is_error; 103 | T _value; 104 | Status _error; 105 | public: 106 | ValueOrStatus(T const& value) : _is_error(false), _value(value), _error(Status::success()) {} 107 | ValueOrStatus(T&& value) : _is_error(false), _value(value), _error(Status::success()) {} 108 | ValueOrStatus(Status const& error) : _is_error(true), _error(error) {} 109 | ValueOrStatus(Status&& error) : _is_error(true), _error(error) {} 110 | bool is_error() const { return _is_error; } 111 | T const& value() const { assert(!_is_error); return _value; } 112 | T& value() { assert(!_is_error); return _value; } 113 | Status const& error() const { assert( _is_error); return _error; } 114 | }; 115 | 116 | } // namespace onnx2trt 117 | -------------------------------------------------------------------------------- /src/onnx_parser/TensorOrWeights.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "ShapedWeights.hpp" 26 | 27 | #include 28 | #include 29 | 30 | namespace onnx2trt { 31 | 32 | class TensorOrWeights { 33 | union { 34 | nvinfer1::ITensor* _tensor; 35 | ShapedWeights _weights; 36 | }; 37 | enum { NODE_TENSOR, NODE_WEIGHTS } _variant; 38 | public: 39 | inline TensorOrWeights() : _tensor(nullptr), _variant(NODE_TENSOR) {} 40 | inline TensorOrWeights(nvinfer1::ITensor* tensor) 41 | : _tensor(tensor), _variant(NODE_TENSOR) {} 42 | inline TensorOrWeights(ShapedWeights const& weights) 43 | : _weights(weights), _variant(NODE_WEIGHTS) {} 44 | inline bool is_tensor() const { return _variant == NODE_TENSOR; } 45 | inline bool is_weights() const { return _variant == NODE_WEIGHTS; } 46 | inline nvinfer1::ITensor& tensor() { 47 | assert(this->is_tensor()); 48 | return *_tensor; 49 | } 50 | inline nvinfer1::ITensor const& tensor() const { 51 | assert(this->is_tensor()); 52 | return *_tensor; 53 | } 54 | inline ShapedWeights& weights() { 55 | assert(this->is_weights()); 56 | return _weights; 57 | } 58 | inline ShapedWeights const& weights() const { 59 | assert(this->is_weights()); 60 | return _weights; 61 | } 62 | inline nvinfer1::Dims shape() const { 63 | return this->is_tensor() ? _tensor->getDimensions() : _weights.shape; 64 | } 65 | inline operator bool() const { 66 | return this->is_tensor() ? (bool)_tensor : (bool)_weights; 67 | } 68 | nvinfer1::ITensor* reset_tensor(nvinfer1::ITensor* tensor) { 69 | assert(this->is_tensor()); 70 | return _tensor = tensor; 71 | } 72 | }; 73 | 74 | } // namespace onnx2trt 75 | -------------------------------------------------------------------------------- /src/onnx_parser/builtin_op_importers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "onnx2trt.hpp" 26 | #include "utils.hpp" 27 | 28 | namespace onnx2trt { 29 | 30 | string_map& getBuiltinOpImporterMap(); 31 | 32 | } // namespace onnx2trt 33 | -------------------------------------------------------------------------------- /src/onnx_parser/common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include // For ::open 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #ifdef _WIN32 36 | #include 37 | #endif 38 | 39 | // Namespace for common functions used throughout onnx-trt 40 | namespace common 41 | { 42 | struct InferDeleter { 43 | template 44 | void operator()(T* obj) const { 45 | if( obj ) { 46 | obj->destroy(); 47 | } 48 | } 49 | }; 50 | 51 | template 52 | inline std::shared_ptr infer_object(T* obj) { 53 | if( !obj ) { 54 | throw std::runtime_error("Failed to create object"); 55 | } 56 | return std::shared_ptr(obj, InferDeleter()); 57 | } 58 | 59 | // Logger for TensorRT info/warning/errors 60 | class TRT_Logger : public nvinfer1::ILogger { 61 | nvinfer1::ILogger::Severity _verbosity; 62 | std::ostream* _ostream; 63 | public: 64 | TRT_Logger(Severity verbosity=Severity::kWARNING, 65 | std::ostream& ostream=std::cout) 66 | : _verbosity(verbosity), _ostream(&ostream) {} 67 | void log(Severity severity, const char* msg) override { 68 | if( severity <= _verbosity ) { 69 | time_t rawtime = std::time(0); 70 | char buf[256]; 71 | strftime(&buf[0], 256, 72 | "%Y-%m-%d %H:%M:%S", 73 | std::gmtime(&rawtime)); 74 | const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : 75 | severity == Severity::kERROR ? " ERROR" : 76 | severity == Severity::kWARNING ? "WARNING" : 77 | severity == Severity::kINFO ? " INFO" : 78 | "UNKNOWN"); 79 | (*_ostream) << "[" << buf << " " << sevstr << "] " 80 | << msg 81 | << std::endl; 82 | } 83 | } 84 | }; 85 | 86 | inline bool ParseFromFile_WAR(google::protobuf::Message* msg, 87 | const char* filename) { 88 | int fd = ::open(filename, O_RDONLY); 89 | google::protobuf::io::FileInputStream raw_input(fd); 90 | raw_input.SetCloseOnDelete(true); 91 | google::protobuf::io::CodedInputStream coded_input(&raw_input); 92 | // Note: This WARs the very low default size limit (64MB) 93 | coded_input.SetTotalBytesLimit(std::numeric_limits::max(), 94 | std::numeric_limits::max()/4); 95 | return msg->ParseFromCodedStream(&coded_input); 96 | } 97 | 98 | inline bool ParseFromTextFile(google::protobuf::Message* msg, 99 | const char* filename) { 100 | int fd = ::open(filename, O_RDONLY); 101 | google::protobuf::io::FileInputStream raw_input(fd); 102 | raw_input.SetCloseOnDelete(true); 103 | return google::protobuf::TextFormat::Parse(&raw_input, msg); 104 | } 105 | 106 | inline std::string onnx_ir_version_string(int64_t ir_version=::ONNX_NAMESPACE::IR_VERSION) { 107 | int onnx_ir_major = ir_version / 1000000; 108 | int onnx_ir_minor = ir_version % 1000000 / 10000; 109 | int onnx_ir_patch = ir_version % 10000; 110 | return (std::to_string(onnx_ir_major) + "." + 111 | std::to_string(onnx_ir_minor) + "." + 112 | std::to_string(onnx_ir_patch)); 113 | } 114 | 115 | inline void print_version() { 116 | std::cout << "Parser built against:" << std::endl; 117 | std::cout << " ONNX IR version: " << onnx_ir_version_string(::ONNX_NAMESPACE::IR_VERSION) << std::endl; 118 | std::cout << " TensorRT version: " 119 | << NV_TENSORRT_MAJOR << "." 120 | << NV_TENSORRT_MINOR << "." 121 | << NV_TENSORRT_PATCH << std::endl; 122 | } 123 | } // namespace common -------------------------------------------------------------------------------- /src/onnx_parser/onnx2trt.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "NvOnnxParser.h" 26 | #include "ShapedWeights.hpp" 27 | #include "TensorOrWeights.hpp" 28 | #include "Status.hpp" 29 | #include "plugin.hpp" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | namespace onnx2trt { 37 | 38 | class IImporterContext; 39 | 40 | // TODO: Find ABI-safe alternative approach for this: 41 | // Can't use std::vector 42 | // Can't use ::onnx::NodeProto 43 | // Can't use std::function 44 | typedef ValueOrStatus> NodeImportResult; 45 | typedef std::function &inputs)> 48 | NodeImporter; 49 | 50 | struct IImporterContext { 51 | virtual nvinfer1::INetworkDefinition* network() = 0; 52 | virtual ShapedWeights createTempWeights(ShapedWeights::DataType type, 53 | nvinfer1::Dims shape) = 0; 54 | virtual nvinfer1::IPluginV2Layer* addPluginV2( 55 | PluginV2* plugin, 56 | std::vector const& inputs) = 0; 57 | virtual int64_t getOpsetVersion(const char* domain="") const = 0; 58 | protected: 59 | virtual ~IImporterContext() {} 60 | }; 61 | 62 | } // namespace onnx2trt 63 | -------------------------------------------------------------------------------- /src/onnx_parser/onnx2trt_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | 28 | # if NV_TENSORRT_MAJOR < 4 29 | namespace nvinfer1 { 30 | 31 | enum class PluginFormat: uint8_t 32 | { 33 | kNCHW = 0, //!< NCHW 34 | kNC2HW2 = 1, //!< NCHW with 2-element packed channels 35 | kNHWC8 = 2 //!< NHWC with 8-element packed channels (C 36 | //!must be a multiple of 8) 37 | }; 38 | // from NvInfer.h 39 | class IPluginExt : public IPlugin 40 | { 41 | public: 42 | virtual int getTensorRTVersion() const 43 | { 44 | return NV_TENSORRT_VERSION; 45 | } 46 | virtual bool supportsFormat(DataType type, PluginFormat format) const = 0; 47 | virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) = 0; 48 | 49 | protected: 50 | void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) final { 51 | DataType type = nvinfer1::DataType::kFLOAT; 52 | PluginFormat format = nvinfer1::PluginFormat::kNCHW; 53 | return this->configureWithFormat( 54 | inputDims, nbInputs, outputDims, nbOutputs, type, format, maxBatchSize); 55 | } 56 | virtual ~IPluginExt() {} 57 | }; 58 | 59 | } // namespace nvinfer1 60 | #endif 61 | 62 | namespace onnx2trt { 63 | 64 | struct IOwnable { 65 | virtual void destroy() = 0; 66 | protected: 67 | virtual ~IOwnable() {} 68 | }; 69 | 70 | struct OwnableDeleter { 71 | void operator()(IOwnable* obj) const { 72 | obj->destroy(); 73 | } 74 | }; 75 | 76 | using UniqueOwnable = std::unique_ptr; 77 | class Plugin; 78 | 79 | } // namespace onnx2trt 80 | -------------------------------------------------------------------------------- /src/onnx_parser/onnx2trt_utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "onnx2trt_utils.hpp" 24 | 25 | namespace onnx2trt { 26 | 27 | void setAttr(nvinfer1::Dims * trtAttr, ::ONNX_NAMESPACE::AttributeProto const* onnxAttr, int nbSpatialDims, int defaultVal){ 28 | assert(trtAttr->nbDims == nbSpatialDims); 29 | int ndim = onnxAttr->ints().size(); 30 | for(int i = 0; i < nbSpatialDims; ++i){ 31 | if(i < ndim){ 32 | trtAttr->d[i] = onnxAttr->ints(i); 33 | } else { 34 | trtAttr->d[i] = defaultVal; 35 | } 36 | } 37 | } 38 | 39 | void get_kernel_params(::ONNX_NAMESPACE::NodeProto const& onnx_node, 40 | nvinfer1::Dims* kernel_size, 41 | nvinfer1::Dims* strides, 42 | nvinfer1::Dims* beg_padding, 43 | nvinfer1::Dims* end_padding, 44 | nvinfer1::PaddingMode& paddingMode, 45 | nvinfer1::Dims* dilations) { 46 | const int nbSpatialDims = kernel_size->nbDims; 47 | OnnxAttrs attrs(onnx_node); 48 | if( attrs.count("kernel_shape") ) { 49 | auto const* onnx_kernel_size = attrs.at("kernel_shape"); 50 | setAttr(kernel_size, onnx_kernel_size, nbSpatialDims, 1); 51 | } 52 | if( attrs.count("strides") ) { 53 | auto const* onnx_strides = attrs.at("strides"); 54 | setAttr(strides, onnx_strides, nbSpatialDims, 1); 55 | } 56 | if( dilations && attrs.count("dilations") ) { 57 | auto const* onnx_dilations = attrs.at("dilations"); 58 | setAttr(dilations, onnx_dilations, nbSpatialDims, 1); 59 | } 60 | paddingMode = nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN; 61 | auto onnx_auto_pad = attrs.get("auto_pad", std::string("NOTSET")); 62 | if( onnx_auto_pad == "VALID" || onnx_auto_pad == "NOTSET" ) { 63 | if( attrs.count("pads") ) { 64 | auto onnx_padding = attrs.get>("pads"); 65 | int ndim = onnx_padding.size() / 2; 66 | for(int i = 0; i < nbSpatialDims; ++i){ 67 | if(i < ndim){ 68 | beg_padding->d[i] = onnx_padding.at(i); 69 | end_padding->d[i] = onnx_padding.at(i + ndim); 70 | } else { 71 | beg_padding->d[i] = 0; 72 | end_padding->d[i] = 0; 73 | } 74 | } 75 | } 76 | } else { // SAME_* padding 77 | assert(!attrs.count("pads")); 78 | // Note: ONNX is always NCHW ordering 79 | if( onnx_auto_pad == "SAME_LOWER" ) { 80 | paddingMode = nvinfer1::PaddingMode::kSAME_LOWER; 81 | } else if( onnx_auto_pad == "SAME_UPPER" ) { 82 | paddingMode = nvinfer1::PaddingMode::kSAME_UPPER; 83 | } else { 84 | throw std::invalid_argument("Unexpected auto_pad value: " + 85 | onnx_auto_pad); 86 | } 87 | } 88 | } 89 | 90 | } // namespace onnx2trt 91 | -------------------------------------------------------------------------------- /src/onnx_parser/onnx_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | namespace { 28 | 29 | // Removes raw data from the text representation of an ONNX model 30 | inline void remove_raw_data_strings(std::string& s) { 31 | std::string::size_type beg = 0; 32 | const std::string key = "raw_data: \""; 33 | const std::string sub = "..."; 34 | while( (beg = s.find(key, beg)) != std::string::npos ) { 35 | beg += key.length(); 36 | std::string::size_type end = beg - 1; 37 | // Note: Must skip over escaped end-quotes 38 | while( s[(end = s.find("\"", ++end)) - 1] == '\\' ) {} 39 | if( end - beg > 128 ) { // Only remove large data strings 40 | s.replace(beg, end - beg, "..."); 41 | } 42 | beg += sub.length(); 43 | } 44 | } 45 | 46 | // Removes float_data, int32_data etc. from the text representation of an ONNX model 47 | inline std::string remove_repeated_data_strings(std::string& s) { 48 | std::istringstream iss(s); 49 | std::ostringstream oss; 50 | bool is_repeat = false; 51 | for( std::string line; std::getline(iss, line); ) { 52 | if( line.find("float_data:") != std::string::npos || 53 | line.find("int32_data:") != std::string::npos || 54 | line.find("int64_data:") != std::string::npos ) { 55 | if( !is_repeat ) { 56 | is_repeat = true; 57 | oss << line.substr(0, line.find(":") + 1) << " ...\n"; 58 | } 59 | } else { 60 | is_repeat = false; 61 | oss << line << "\n"; 62 | } 63 | } 64 | return oss.str(); 65 | } 66 | 67 | } // anonymous namespace 68 | 69 | inline std::string pretty_print_onnx_to_string(::google::protobuf::Message const& message) { 70 | std::string s; 71 | ::google::protobuf::TextFormat::PrintToString(message, &s); 72 | remove_raw_data_strings(s); 73 | s = remove_repeated_data_strings(s); 74 | return s; 75 | } 76 | 77 | inline std::ostream& operator<<(std::ostream& stream, ::ONNX_NAMESPACE::ModelProto const& message) { 78 | stream << pretty_print_onnx_to_string(message); 79 | return stream; 80 | } 81 | 82 | inline std::ostream& operator<<(std::ostream& stream, ::ONNX_NAMESPACE::NodeProto const& message) { 83 | stream << pretty_print_onnx_to_string(message); 84 | return stream; 85 | } 86 | -------------------------------------------------------------------------------- /src/onnx_parser/plugin.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "plugin.hpp" 24 | #include "serialize.hpp" 25 | #include 26 | 27 | namespace onnx2trt { 28 | 29 | // ========================= Plugin ===================== 30 | 31 | void Plugin::serializeBase(void*& buffer) { 32 | serialize_value(&buffer, _input_dims); 33 | serialize_value(&buffer, _max_batch_size); 34 | serialize_value(&buffer, _data_type); 35 | serialize_value(&buffer, _data_format); 36 | } 37 | 38 | void Plugin::deserializeBase(void const*& serialData, size_t& serialLength) { 39 | deserialize_value(&serialData, &serialLength, &_input_dims); 40 | deserialize_value(&serialData, &serialLength, &_max_batch_size); 41 | deserialize_value(&serialData, &serialLength, &_data_type); 42 | deserialize_value(&serialData, &serialLength, &_data_format); 43 | } 44 | 45 | size_t Plugin::getBaseSerializationSize() { 46 | return (serialized_size(_input_dims) + 47 | serialized_size(_max_batch_size) + 48 | serialized_size(_data_type) + 49 | serialized_size(_data_format)); 50 | } 51 | 52 | bool Plugin::supportsFormat(nvinfer1::DataType type, 53 | nvinfer1::PluginFormat format) const { 54 | return ((type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF) && 55 | (format == nvinfer1::PluginFormat::kNCHW)); 56 | } 57 | 58 | void Plugin::configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs, 59 | const nvinfer1::Dims* outputDims, int nbOutputs, 60 | nvinfer1::DataType type, 61 | nvinfer1::PluginFormat format, 62 | int maxBatchSize) { 63 | _data_type = type; 64 | _data_format = format; 65 | _input_dims.assign(inputDims, inputDims + nbInputs); 66 | _max_batch_size = maxBatchSize; 67 | } 68 | 69 | // ========================= PluginV2 ===================== 70 | 71 | void PluginV2::serializeBase(void*& buffer) const { 72 | serialize_value(&buffer, _input_dims); 73 | serialize_value(&buffer, _max_batch_size); 74 | serialize_value(&buffer, _data_type); 75 | serialize_value(&buffer, _data_format); 76 | } 77 | 78 | void PluginV2::deserializeBase(void const*& serialData, size_t& serialLength) { 79 | deserialize_value(&serialData, &serialLength, &_input_dims); 80 | deserialize_value(&serialData, &serialLength, &_max_batch_size); 81 | deserialize_value(&serialData, &serialLength, &_data_type); 82 | deserialize_value(&serialData, &serialLength, &_data_format); 83 | } 84 | 85 | size_t PluginV2::getBaseSerializationSize() const { 86 | return (serialized_size(_input_dims) + 87 | serialized_size(_max_batch_size) + 88 | serialized_size(_data_type) + 89 | serialized_size(_data_format)); 90 | } 91 | 92 | bool PluginV2::supportsFormat(nvinfer1::DataType type, 93 | nvinfer1::PluginFormat format) const { 94 | return ((type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF) && 95 | (format == nvinfer1::PluginFormat::kNCHW)); 96 | } 97 | 98 | void PluginV2::configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs, 99 | const nvinfer1::Dims* outputDims, int nbOutputs, 100 | nvinfer1::DataType type, 101 | nvinfer1::PluginFormat format, 102 | int maxBatchSize) { 103 | _data_type = type; 104 | _data_format = format; 105 | _input_dims.assign(inputDims, inputDims + nbInputs); 106 | _max_batch_size = maxBatchSize; 107 | } 108 | 109 | // ========================= PluginAdapter ===================== 110 | 111 | int PluginAdapter::getNbOutputs() const { 112 | return _pluginV2->getNbOutputs(); 113 | } 114 | nvinfer1::Dims PluginAdapter::getOutputDimensions(int index, 115 | const nvinfer1::Dims *inputDims, 116 | int nbInputs) { 117 | return _pluginV2->getOutputDimensions(index, inputDims, nbInputs); 118 | } 119 | void PluginAdapter::serialize(void* buffer) const { 120 | return _pluginV2->serialize(buffer); 121 | } 122 | size_t PluginAdapter::getSerializationSize() const { 123 | return _pluginV2->getSerializationSize(); 124 | } 125 | bool PluginAdapter::supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const 126 | { 127 | return _pluginV2->supportsFormat(type, format); 128 | } 129 | void PluginAdapter::configureWithFormat(const nvinfer1::Dims *inputDims, int nbInputs, 130 | const nvinfer1::Dims *outputDims, int nbOutputs, 131 | nvinfer1::DataType type, 132 | nvinfer1::PluginFormat format, 133 | int maxBatchSize) { 134 | return _pluginV2->configureWithFormat(inputDims, nbInputs, 135 | outputDims, nbOutputs, 136 | type, format, maxBatchSize); 137 | } 138 | size_t PluginAdapter::getWorkspaceSize(int maxBatchSize) const { 139 | return _pluginV2->getWorkspaceSize(maxBatchSize); 140 | } 141 | int PluginAdapter::initialize() { return _pluginV2->initialize(); } 142 | 143 | void PluginAdapter::terminate() { 144 | if (_pluginV2) { 145 | _pluginV2->terminate(); 146 | } 147 | } 148 | 149 | void PluginAdapter::destroy() { 150 | return _pluginV2->destroy(); 151 | } 152 | 153 | nvinfer1::IPluginV2* PluginAdapter::clone() const { 154 | return _pluginV2->clone(); 155 | } 156 | 157 | const char* PluginAdapter::getPluginVersion() const { 158 | return _pluginV2->getPluginVersion(); 159 | } 160 | 161 | void PluginAdapter::setPluginNamespace(const char* pluginNamespace) { 162 | return _pluginV2->setPluginNamespace(pluginNamespace); 163 | } 164 | 165 | const char* PluginAdapter::getPluginNamespace() const { 166 | return _pluginV2->getPluginNamespace(); 167 | } 168 | 169 | int PluginAdapter::enqueue(int batchSize, 170 | const void *const *inputs, void **outputs, 171 | void *workspace, cudaStream_t stream) { 172 | return _pluginV2->enqueue(batchSize, inputs, outputs, workspace, stream); 173 | } 174 | 175 | } // namespace onnx2trt 176 | -------------------------------------------------------------------------------- /src/onnx_parser/plugin_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | #pragma once 23 | 24 | namespace onnx2trt { 25 | static const char REGISTERABLE_PLUGIN_MAGIC_STRING[] = "Pv01"; 26 | } // namespace onnx2trt 27 | 28 | -------------------------------------------------------------------------------- /src/onnx_parser/serialize.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | using std::cout; 32 | using std::cerr; 33 | using std::endl; 34 | 35 | template 36 | inline void serialize_value(void** buffer, T const& value); 37 | 38 | template 39 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value); 40 | 41 | namespace { 42 | 43 | template 44 | struct Serializer {}; 45 | 46 | template 47 | struct Serializer::value || 49 | std::is_enum::value || 50 | std::is_pod::value>::type> { 51 | static size_t serialized_size(T const& value) { 52 | return sizeof(T); 53 | } 54 | static void serialize(void** buffer, T const& value) { 55 | ::memcpy(*buffer, &value, sizeof(T)); 56 | reinterpret_cast(*buffer) += sizeof(T); 57 | } 58 | static void deserialize(void const** buffer, size_t* buffer_size, T* value) { 59 | assert(*buffer_size >= sizeof(T)); 60 | ::memcpy(value, *buffer, sizeof(T)); 61 | reinterpret_cast(*buffer) += sizeof(T); 62 | *buffer_size -= sizeof(T); 63 | } 64 | }; 65 | 66 | template<> 67 | struct Serializer { 68 | static size_t serialized_size(const char* value) { 69 | return strlen(value) + 1; 70 | } 71 | static void serialize(void** buffer, const char* value) { 72 | ::strcpy(static_cast(*buffer), value); 73 | reinterpret_cast(*buffer) += strlen(value) + 1; 74 | } 75 | static void deserialize(void const** buffer, size_t* buffer_size, const char** value) { 76 | *value = static_cast(*buffer); 77 | size_t data_size = strnlen(*value, *buffer_size) + 1; 78 | assert(*buffer_size >= data_size); 79 | reinterpret_cast(*buffer) += data_size; 80 | *buffer_size -= data_size; 81 | } 82 | }; 83 | 84 | template 85 | struct Serializer, typename std::enable_if< 86 | std::is_arithmetic::value || 87 | std::is_enum::value || 88 | std::is_pod::value>::type> { 89 | static size_t serialized_size(std::vector const& value) { 90 | return sizeof(value.size()) + value.size() * sizeof(T); 91 | } 92 | static void serialize(void** buffer, std::vector const& value) { 93 | serialize_value(buffer, value.size()); 94 | size_t nbyte = value.size() * sizeof(T); 95 | ::memcpy(*buffer, value.data(), nbyte); 96 | reinterpret_cast(*buffer) += nbyte; 97 | } 98 | static void deserialize(void const** buffer, size_t* buffer_size, std::vector* value) { 99 | size_t size; 100 | deserialize_value(buffer, buffer_size, &size); 101 | value->resize(size); 102 | size_t nbyte = value->size() * sizeof(T); 103 | assert(*buffer_size >= nbyte); 104 | ::memcpy(value->data(), *buffer, nbyte); 105 | reinterpret_cast(*buffer) += nbyte; 106 | *buffer_size -= nbyte; 107 | } 108 | }; 109 | 110 | } // namespace 111 | 112 | template 113 | inline size_t serialized_size(T const& value) { 114 | return Serializer::serialized_size(value); 115 | } 116 | 117 | template 118 | inline void serialize_value(void** buffer, T const& value) { 119 | return Serializer::serialize(buffer, value); 120 | } 121 | 122 | template 123 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value) { 124 | return Serializer::deserialize(buffer, buffer_size, value); 125 | } 126 | -------------------------------------------------------------------------------- /src/onnx_parser/toposort.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | using std::cout; 30 | using std::cerr; 31 | using std::endl; 32 | 33 | namespace { 34 | 35 | enum NodeState { NODE_UNVISITED, NODE_ACTIVE, NODE_VISITED }; 36 | 37 | template 38 | bool get_post_order(size_t node_idx, 39 | Container const& nodes, 40 | std::unordered_map const& node_map, 41 | std::vector* node_states, 42 | std::vector* order) { 43 | NodeState& node_state = node_states->at(node_idx); 44 | if( node_state == NODE_ACTIVE ) { 45 | // Cycle detected! 46 | cerr << "ERROR: Graph contains a cycle" << endl; 47 | return false; 48 | } else if( node_state == NODE_VISITED ) { 49 | return true; 50 | } else { 51 | node_state = NODE_ACTIVE; 52 | // TODO: This .Get().input() is highly specific to protobuf, should 53 | // generalise it somehow. 54 | for( auto const& input : nodes.Get(node_idx).input() ) { 55 | if( !node_map.count(input) ) { 56 | // Input node not found in graph! 57 | //cerr << "ERROR: Input node not found in graph: " 58 | // << input << endl; 59 | //return false; 60 | continue; // Skip missing input edges 61 | } 62 | size_t input_node_idx = node_map.at(input); 63 | if( !get_post_order(input_node_idx, nodes, node_map, node_states, order) ) { 64 | return false; 65 | } 66 | } 67 | node_state = NODE_VISITED; 68 | order->push_back(node_idx); 69 | } 70 | return true; 71 | } 72 | 73 | } // anonymous namespace 74 | 75 | template 76 | bool toposort(Container const& nodes, std::vector* order) { 77 | std::unordered_map node_map; 78 | for( size_t i=0; i<(size_t)nodes.size(); ++i ) { 79 | // TODO: This .Get().input() is highly specific to protobuf, should 80 | // generalise it somehow. 81 | for( auto const& output : nodes.Get(i).output() ) { 82 | if( !node_map.emplace(output, i).second ) { 83 | // Output name appears more than once in graph! 84 | cerr << "ERROR: Output name is not unique: " 85 | << output << endl; 86 | return false; 87 | } 88 | } 89 | } 90 | order->reserve(nodes.size()); 91 | std::vector node_states(nodes.size(), NODE_UNVISITED); 92 | for( size_t i=0; i<(size_t)nodes.size(); ++i ) { 93 | if( !get_post_order(i, nodes, node_map, &node_states, order) ) { 94 | return false; 95 | } 96 | } 97 | return true; 98 | } 99 | -------------------------------------------------------------------------------- /src/onnx_parser/utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | 27 | template 28 | using string_map = std::unordered_map; 29 | -------------------------------------------------------------------------------- /src/onnxplugin/plugins/DCNv2.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DCNv2_HPP 3 | #define DCNv2_HPP 4 | 5 | #include 6 | #include 7 | 8 | using namespace ONNXPlugin; 9 | 10 | class DCNv2 : public TRTPlugin { 11 | public: 12 | SetupPlugin(DCNv2); 13 | 14 | virtual int initialize() override; 15 | virtual void terminate() override; 16 | virtual std::shared_ptr config(const std::string& layerName) override; 17 | virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims); 18 | virtual size_t getWorkspaceSize(int maxBatchSize) const override; 19 | virtual int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override; 20 | private: 21 | cublasHandle_t cublasHandle_ = nullptr; 22 | }; 23 | 24 | #endif //DCNv2_HPP -------------------------------------------------------------------------------- /src/onnxplugin/plugins/HSigmoid.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "HSigmoid.hpp" 4 | 5 | typedef TRTInfer::halfloat halfloat; 6 | 7 | template 8 | __global__ void HSigmoidKernel(_T* input, _T* output, int edge); 9 | 10 | 11 | template<> 12 | __global__ void HSigmoidKernel(float* input, float* output, int edge) { 13 | 14 | KERNEL_POSITION; 15 | float x = input[position]; 16 | float a = x + 3; 17 | a = a < 0 ? 0 : (a >= 6 ? 6 : a); 18 | output[position] = a / 6; 19 | } 20 | 21 | template<> 22 | __global__ void HSigmoidKernel(halfloat* input, halfloat* output, int edge) { 23 | 24 | KERNEL_POSITION; 25 | 26 | halfloat _six = 6.0f; 27 | halfloat x = input[position]; 28 | halfloat a = x + halfloat(3.0f); 29 | halfloat _zero = 0.0f; 30 | a = a < _zero ? _zero : (a >= _six ? _six : a); 31 | output[position] = a / _six; 32 | } 33 | 34 | void HSigmoidConfig::init(){ 35 | //INFO("init HSigmoid config: %s", info_.c_str()); 36 | //INFO("weights = %d", this->weights_.size()); 37 | } 38 | 39 | nvinfer1::Dims HSigmoid::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) { 40 | return inputDims[0]; 41 | } 42 | 43 | std::shared_ptr HSigmoid::config(const std::string& layerName) { 44 | auto cfg = std::shared_ptr(new HSigmoidConfig()); 45 | 46 | //定义我们这个插件支持half和float格式 47 | cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 48 | //cfg->supportDataType_ = {nvinfer1::DataType::kHALF}; 49 | return cfg; 50 | } 51 | 52 | int HSigmoid::enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) { 53 | 54 | int count = inputs[0].count(); 55 | auto grid = gridDims(count); 56 | auto block = blockDims(count); 57 | 58 | if (config_->configDataType_ == TRTInfer::DataType::dtFloat) { 59 | HSigmoidKernel <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 60 | } 61 | else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) { 62 | HSigmoidKernel <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 63 | } 64 | return 0; 65 | } 66 | 67 | RegisterPlugin(HSigmoid); -------------------------------------------------------------------------------- /src/onnxplugin/plugins/HSigmoid.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef WReLU_HPP 3 | #define WReLU_HPP 4 | 5 | #include 6 | 7 | using namespace ONNXPlugin; 8 | 9 | class HSigmoidConfig : public LayerConfig{ 10 | public: 11 | virtual void init() override; 12 | }; 13 | 14 | class HSigmoid : public TRTPlugin { 15 | public: 16 | SetupPlugin(HSigmoid); 17 | 18 | virtual std::shared_ptr config(const std::string& layerName) override; 19 | 20 | //这个插件只有一个输出,输出的shape等于输入0的shape,因此返回input0的shape 21 | virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override; 22 | 23 | //执行过程 24 | int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override; 25 | }; 26 | 27 | #endif //WReLU_HPP -------------------------------------------------------------------------------- /src/onnxplugin/plugins/HSwish.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "HSwish.hpp" 4 | 5 | typedef TRTInfer::halfloat halfloat; 6 | 7 | template 8 | __global__ void HSwishKernel(_T* input, _T* output, int edge); 9 | 10 | 11 | template<> 12 | __global__ void HSwishKernel(float* input, float* output, int edge) { 13 | 14 | KERNEL_POSITION; 15 | float x = input[position]; 16 | float a = x + 3; 17 | a = a < 0 ? 0 : (a >= 6 ? 6 : a); 18 | output[position] = x * a / 6; 19 | } 20 | 21 | template<> 22 | __global__ void HSwishKernel(halfloat* input, halfloat* output, int edge) { 23 | 24 | KERNEL_POSITION; 25 | 26 | halfloat _six = 6.0f; 27 | halfloat x = input[position]; 28 | halfloat a = x + halfloat(3.0f); 29 | halfloat _zero = 0.0f; 30 | a = a < _zero ? _zero : (a >= _six ? _six : a); 31 | output[position] = x * a / _six; 32 | } 33 | 34 | void HSwishConfig::init(){ 35 | //INFO("init hswish config: %s", info_.c_str()); 36 | //INFO("weights = %d", this->weights_.size()); 37 | } 38 | 39 | nvinfer1::Dims HSwish::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) { 40 | return inputDims[0]; 41 | } 42 | 43 | std::shared_ptr HSwish::config(const std::string& layerName) { 44 | auto cfg = std::shared_ptr(new HSwishConfig()); 45 | 46 | //定义我们这个插件支持half和float格式 47 | cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 48 | //cfg->supportDataType_ = {nvinfer1::DataType::kHALF}; 49 | return cfg; 50 | } 51 | 52 | int HSwish::enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) { 53 | 54 | int count = inputs[0].count(); 55 | auto grid = gridDims(count); 56 | auto block = blockDims(count); 57 | 58 | if (config_->configDataType_ == TRTInfer::DataType::dtFloat) { 59 | HSwishKernel <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 60 | } 61 | else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) { 62 | HSwishKernel <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 63 | } 64 | return 0; 65 | } 66 | 67 | RegisterPlugin(HSwish); -------------------------------------------------------------------------------- /src/onnxplugin/plugins/HSwish.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef WReLU_HPP 3 | #define WReLU_HPP 4 | 5 | #include 6 | 7 | using namespace ONNXPlugin; 8 | 9 | class HSwishConfig : public LayerConfig{ 10 | public: 11 | virtual void init() override; 12 | }; 13 | 14 | class HSwish : public TRTPlugin { 15 | public: 16 | SetupPlugin(HSwish); 17 | 18 | virtual std::shared_ptr config(const std::string& layerName) override; 19 | 20 | //这个插件只有一个输出,输出的shape等于输入0的shape,因此返回input0的shape 21 | virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override; 22 | 23 | //执行过程 24 | int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override; 25 | }; 26 | 27 | #endif //WReLU_HPP -------------------------------------------------------------------------------- /src/onnxplugin/plugins/MReLU.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "MReLU.hpp" 4 | #include 5 | 6 | typedef TRTInfer::halfloat halfloat; 7 | 8 | template 9 | __global__ void MReLUKernel(_T* input, _T* output, _T bias, int edge); 10 | 11 | 12 | template<> 13 | __global__ void MReLUKernel(float* input, float* output, float bias, int edge) { 14 | 15 | KERNEL_POSITION; 16 | float x = input[position]; 17 | float a = x > 0 ? x : 0; 18 | output[position] = a + bias; 19 | } 20 | 21 | template<> 22 | __global__ void MReLUKernel(halfloat* input, halfloat* output, halfloat bias, int edge) { 23 | 24 | KERNEL_POSITION; 25 | 26 | halfloat x = input[position]; 27 | halfloat _zero = 0.0f; 28 | x = x > _zero ? x : _zero; 29 | output[position] = x + bias; 30 | } 31 | 32 | void MReLUConfig::init(){ 33 | INFO("init MReLU config: %s", info_.c_str()); 34 | INFO("MReLU weights = %d[%s]", this->weights_.size(), this->weights_[0]->shapeString()); 35 | 36 | Json::Value value; 37 | if(Json::Reader().parse(info_, value)){ 38 | INFO("MReLU kernel_size: %d", value["kernel_size"].asInt()); 39 | INFO("MReLU eps: %g", value["eps"].asFloat()); 40 | INFO("MReLU other: %s", value["other"].asCString()); 41 | } 42 | } 43 | 44 | nvinfer1::Dims MReLU::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) { 45 | return inputDims[0]; 46 | } 47 | 48 | std::shared_ptr MReLU::config(const std::string& layerName) { 49 | auto cfg = std::shared_ptr(new MReLUConfig()); 50 | 51 | //定义我们这个插件支持half和float格式 52 | cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 53 | //cfg->supportDataType_ = {nvinfer1::DataType::kHALF}; 54 | return cfg; 55 | } 56 | 57 | int MReLU::enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) { 58 | 59 | int count = inputs[0].count(); 60 | auto grid = gridDims(count); 61 | auto block = blockDims(count); 62 | float bias = *this->config_->weights_[0]->cpu(); 63 | 64 | if (config_->configDataType_ == TRTInfer::DataType::dtFloat) { 65 | MReLUKernel <<>> (inputs[0].ptr(), outputs[0].ptr(), bias, count); 66 | } 67 | else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) { 68 | MReLUKernel <<>> (inputs[0].ptr(), outputs[0].ptr(), halfloat(bias), count); 69 | } 70 | return 0; 71 | } 72 | 73 | RegisterPlugin(MReLU); -------------------------------------------------------------------------------- /src/onnxplugin/plugins/MReLU.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef WReLU_HPP 3 | #define WReLU_HPP 4 | 5 | #include 6 | 7 | using namespace ONNXPlugin; 8 | 9 | class MReLUConfig : public LayerConfig{ 10 | public: 11 | virtual void init() override; 12 | }; 13 | 14 | class MReLU : public TRTPlugin { 15 | public: 16 | SetupPlugin(MReLU); 17 | 18 | virtual std::shared_ptr config(const std::string& layerName) override; 19 | 20 | //这个插件只有一个输出,输出的shape等于输入0的shape,因此返回input0的shape 21 | virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override; 22 | 23 | //执行过程 24 | int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override; 25 | }; 26 | 27 | #endif //WReLU_HPP -------------------------------------------------------------------------------- /workspace/imgs/000020.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/000020.jpg -------------------------------------------------------------------------------- /workspace/imgs/000023.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/000023.jpg -------------------------------------------------------------------------------- /workspace/imgs/17790319373_bd19b24cfc_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/17790319373_bd19b24cfc_k.jpg -------------------------------------------------------------------------------- /workspace/imgs/selfie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/selfie.jpg -------------------------------------------------------------------------------- /workspace/imgs/www.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/www.jpg -------------------------------------------------------------------------------- /workspace/logs/2020-04-15.log: -------------------------------------------------------------------------------- 1 | W[2020-04-15 21:47:53:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:19]:onnx to trtmodel... 2 | W[2020-04-15 21:47:53:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:96]:Build FP32 trtmodel. 3 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:33]:init MReLU config: {"kernel_size": 3, "eps": 0.03, "other": "Hello Onnx Plugin"} 4 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:34]:MReLU weights = 1[1 x 1 x 1 x 1] 5 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:38]:MReLU kernel_size: 3 6 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:39]:MReLU eps: 0.03 7 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:40]:MReLU other: Hello Onnx Plugin 8 | I[2020-04-15 21:47:54:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:200]:input shape: 3 x 5 x 5 9 | W[2020-04-15 21:47:54:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:201]:Set max batch size: 4 10 | W[2020-04-15 21:47:54:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:206]:Build engine 11 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:26]:done. 12 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:28]:load model: models/demo.fp32.trtmodel 13 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:33]:init MReLU config: {"kernel_size": 3, "eps": 0.03, "other": "Hello Onnx Plugin"} 14 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:34]:MReLU weights = 1[1 x 1 x 1 x 1] 15 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:38]:MReLU kernel_size: 3 16 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:39]:MReLU eps: 0.03 17 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:40]:MReLU other: Hello Onnx Plugin 18 | W[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:35]:NVInfer WARNING: Current optimization profile is: 0. Please ensure there are no enqueued operations pending in this context prior to switching profiles 19 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:35]:forward... 20 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:42]:done. 21 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\dbface.cpp:140]:models/dbface.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dbface.onnx 22 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_net_coco2x_dcn.cpp:121]:onnx to trtmodel... 23 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_net_coco2x_dcn.cpp:132]:models/dladcnv2.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dladcnv2.onnx or use centerNetDLADCNOnnX/dladcn_export_onnx.py to generate 24 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_track_coco_tracking.cpp:131]:onnx to trtmodel... 25 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_track_coco_tracking.cpp:139]:models/coco_tracking.onnx not found, download url: http://zifuture.com:1000/fs/public_models/coco_tracking.onnx 26 | -------------------------------------------------------------------------------- /workspace/models/.gitignore: -------------------------------------------------------------------------------- 1 | dladcnv2.onnx 2 | dbface.onnx 3 | coco_tracking.onnx 4 | *.trtmodel -------------------------------------------------------------------------------- /workspace/models/demo.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/models/demo.onnx -------------------------------------------------------------------------------- /workspace/results/0.centernet.coco2x.dcn.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/0.centernet.coco2x.dcn.jpg -------------------------------------------------------------------------------- /workspace/results/1.centernet.coco2x.dcn.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/1.centernet.coco2x.dcn.jpg -------------------------------------------------------------------------------- /workspace/results/coco.tracking.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/coco.tracking.jpg -------------------------------------------------------------------------------- /workspace/results/selfie.draw.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/selfie.draw.jpg --------------------------------------------------------------------------------