├── .gitignore
├── Makefile
├── README.cn.md
├── README.md
├── README.onnx.plugin.md
├── README.onnx.plugin.pdf
├── TensorRT.sln
├── TensorRT.vcxproj
├── TensorRT.vcxproj.filters
├── TensorRT.vcxproj.user
├── dbface
    └── DBFace.py
├── dcn_onnx
    ├── DCNv2
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── dcn_v2.py
    │   ├── make.sh
    │   ├── setup.py
    │   ├── src
    │   │   ├── cpu
    │   │   │   ├── dcn_v2_cpu.cpp
    │   │   │   └── vision.h
    │   │   ├── cuda
    │   │   │   ├── dcn_v2_cuda.cu
    │   │   │   ├── dcn_v2_im2col_cuda.cu
    │   │   │   ├── dcn_v2_im2col_cuda.h
    │   │   │   ├── dcn_v2_psroi_pooling_cuda.cu
    │   │   │   └── vision.h
    │   │   ├── dcn_v2.h
    │   │   └── vision.cpp
    │   └── test.py
    ├── README.md
    ├── dcn_v2.py
    ├── dladcn_export_onnx.py
    └── pose_dla_dcn.py
├── lean
    ├── .gitignore
    └── README.md
├── plugin_onnx_export.py
├── scripts
    ├── getALL.sh
    ├── getCenterTrack.sh
    ├── getDBFace.sh
    └── getDLADCN.sh
├── src
    ├── builder
    │   ├── trt_builder.cpp
    │   └── trt_builder.hpp
    ├── caffeplugin
    │   ├── caffeplugin.cpp
    │   ├── caffeplugin.hpp
    │   └── plugins
    │   │   ├── ChannelMultiplicationLayer.cu
    │   │   ├── ChannelMultiplicationLayer.hpp
    │   │   ├── ClipLayer.cu
    │   │   ├── ClipLayer.hpp
    │   │   ├── DCNLayer.cu
    │   │   ├── DCNLayer.hpp
    │   │   ├── PlexShuffleLayer.cu
    │   │   ├── PlexShuffleLayer.hpp
    │   │   ├── TestPlugin.cu
    │   │   └── TestPlugin.hpp
    ├── common
    │   ├── cc_util.cpp
    │   ├── cc_util.hpp
    │   ├── json.cpp
    │   ├── json.hpp
    │   ├── trt_common.cpp
    │   └── trt_common.hpp
    ├── examples
    │   ├── center_net_coco2x_dcn.cpp
    │   ├── center_track_coco_tracking.cpp
    │   ├── dbface.cpp
    │   └── onnx.cpp
    ├── import_lib.cpp
    ├── infer
    │   ├── ct_detect_backend.cu
    │   ├── ct_detect_backend.hpp
    │   ├── dbface_backend.cu
    │   ├── dbface_backend.hpp
    │   ├── task_pool.hpp
    │   ├── trt_backend.cpp
    │   ├── trt_backend.hpp
    │   ├── trt_infer.cpp
    │   ├── trt_infer.hpp
    │   └── trt_infer_norm.cu
    ├── main.cpp
    ├── onnx
    │   ├── onnx-operators_ONNX_NAMESPACE-ml.pb.cpp
    │   ├── onnx-operators_ONNX_NAMESPACE-ml.pb.h
    │   ├── onnx_ONNX_NAMESPACE-ml.pb.cpp
    │   ├── onnx_ONNX_NAMESPACE-ml.pb.h
    │   ├── onnx_pb.h
    │   └── onnxifi.h
    ├── onnx_parser
    │   ├── ImporterContext.hpp
    │   ├── InstanceNormalization.cpp
    │   ├── InstanceNormalization.hpp
    │   ├── ModelImporter.cpp
    │   ├── ModelImporter.hpp
    │   ├── NvOnnxParser.cpp
    │   ├── NvOnnxParser.h
    │   ├── NvOnnxParserTypedefs.h
    │   ├── OnnxAttrs.cpp
    │   ├── OnnxAttrs.hpp
    │   ├── ResizeNearest.cu
    │   ├── ResizeNearest.hpp
    │   ├── ShapedWeights.cpp
    │   ├── ShapedWeights.hpp
    │   ├── Split.cu
    │   ├── Split.hpp
    │   ├── Status.hpp
    │   ├── TensorOrWeights.hpp
    │   ├── builtin_op_importers.cpp
    │   ├── builtin_op_importers.hpp
    │   ├── common.hpp
    │   ├── onnx2trt.hpp
    │   ├── onnx2trt_common.hpp
    │   ├── onnx2trt_utils.cpp
    │   ├── onnx2trt_utils.hpp
    │   ├── onnx_utils.hpp
    │   ├── plugin.cpp
    │   ├── plugin.hpp
    │   ├── plugin_common.hpp
    │   ├── serialize.hpp
    │   ├── toposort.hpp
    │   ├── trt_utils.hpp
    │   └── utils.hpp
    └── onnxplugin
    │   ├── onnxplugin.cpp
    │   ├── onnxplugin.hpp
    │   └── plugins
    │       ├── DCNv2.cu
    │       ├── DCNv2.hpp
    │       ├── HSigmoid.cu
    │       ├── HSigmoid.hpp
    │       ├── HSwish.cu
    │       ├── HSwish.hpp
    │       ├── MReLU.cu
    │       └── MReLU.hpp
└── workspace
    ├── imgs
        ├── 000020.jpg
        ├── 000023.jpg
        ├── 17790319373_bd19b24cfc_k.jpg
        ├── selfie.jpg
        └── www.jpg
    ├── logs
        ├── 2020-04-15.log
        └── 2020-04-17.log
    ├── models
        ├── .gitignore
        └── demo.onnx
    └── results
        ├── 0.centernet.coco2x.dcn.jpg
        ├── 1.centernet.coco2x.dcn.jpg
        ├── coco.tracking.jpg
        └── selfie.draw.jpg


/.gitignore:
--------------------------------------------------------------------------------
1 | **/.trtmodel
2 | /build
3 | .vs
4 | **/*.pdb
5 | **/*.exe
6 | **/*.ilk
7 | **/*.suo
8 | objs


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | ECHO = @echo
 2 | OUTNAME = trtrun
 3 | CC := g++
 4 | CUCC := nvcc
 5 | SRCDIR := src
 6 | OBJDIR := objs
 7 | LEAN := /datav/newbb/lean
 8 | #BINDIR := $(LEAN)/tensorRTIntegrate
 9 | BINDIR := workspace
10 | 
11 | TENSORRT_NAME := TensorRT-7.0.0.11
12 | #TENSORRT_NAME := TensorRT-6.0.1.8-cuda10.2-cudnn7.6
13 | CFLAGS := -std=c++11 -fPIC -m64 -g -O3 -fopenmp -w -DONNX_ML -DNDEBUG 
14 | CUFLAGS := -std=c++11 -m64 -Xcompiler -fPIC -g -O3 -w -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_61,code=sm_61
15 | INC_OPENCV := $(LEAN)/opencv4.2.0/include/opencv4
16 | INC_LOCAL := ./src ./src/builder ./src/common ./src/infer ./src/plugin ./src/plugin/plugins
17 | INC_SYS := /usr/local/protobuf/include
18 | INC_CUDA := $(LEAN)/cuda10.2/include $(LEAN)/$(TENSORRT_NAME)/include $(LEAN)/cudnn7.6.5.32-cuda10.2
19 | INCS := $(INC_SYS) $(INC_OPENCV) $(INC_LOCAL) $(INC_CUDA)
20 | INCS := $(foreach inc, $(INCS), -I$(inc))
21 | 
22 | LIB_CUDA := $(LEAN)/cuda10.2/lib $(LEAN)/$(TENSORRT_NAME)/lib $(LEAN)/cudnn7.6.5.32-cuda10.2
23 | LIB_SYS := /usr/local/protobuf/lib
24 | LIB_OPENCV := $(LEAN)/opencv4.2.0/lib 
25 | LIBS := $(LIB_SYS) $(LIB_CUDA) $(LIB_OPENCV)
26 | LIBS := $(foreach lib, $(LIBS),-L$(lib))
27 | 
28 | RPATH := $(LIB_SYS) $(LIB_CUDA) $(LIB_OPENCV)
29 | RPATH := $(foreach lib, $(RPATH),-Wl,-rpath=$(lib))
30 | 
31 | LD_OPENCV := opencv_core opencv_highgui opencv_imgproc opencv_video opencv_videoio opencv_imgcodecs
32 | LD_NVINFER := nvinfer nvinfer_plugin nvparsers
33 | LD_CUDA := cuda curand cublas cudart cudnn
34 | LD_SYS := stdc++
35 | LDS := $(LD_SYS) $(LD_OPENCV) $(LD_NVINFER) $(LD_CUDA)
36 | LDS := $(foreach lib, $(LDS), -l$(lib))
37 | 
38 | SRCS := $(shell cd $(SRCDIR) && find -name "*.cpp")
39 | OBJS := $(patsubst %.cpp,%.o,$(SRCS))
40 | OBJS := $(foreach item,$(OBJS),$(OBJDIR)/$(item))
41 | CUS := $(shell cd $(SRCDIR) && find -name "*.cu")
42 | CUOBJS := $(patsubst %.cu,%.o,$(CUS))
43 | CUOBJS := $(foreach item,$(CUOBJS),$(OBJDIR)/$(item))
44 | OBJS := $(subst /./,/,$(OBJS))
45 | CUOBJS := $(subst /./,/,$(CUOBJS))
46 | 
47 | all: $(BINDIR)/$(OUTNAME)
48 | 	$(ECHO) Done, now you can run this program with \"make run\" command.
49 | 
50 | run: all
51 | 	@cd $(BINDIR) && ./$(OUTNAME)
52 | 
53 | $(BINDIR)/$(OUTNAME): $(OBJS) $(CUOBJS)
54 | 	$(ECHO) Linking: $@
55 | 	@g++  $(LIBS) -o $@ $^ $(LDS) $(RPATH) -pthread -lprotobuf 
56 | 
57 | $(CUOBJS) : $(OBJDIR)/%.o : $(SRCDIR)/%.cu
58 | 	@if [ ! -d $@ ]; then mkdir -p $(dir $@); fi
59 | 	$(ECHO) Compiling: $<
60 | 	@$(CUCC) $(CUFLAGS) $(INCS) -c -o $@ $<
61 | 
62 | $(OBJS) : $(OBJDIR)/%.o : $(SRCDIR)/%.cpp
63 | 	@if [ ! -d $@ ]; then mkdir -p $(dir $@); fi
64 | 	$(ECHO) Compiling: $<
65 | 	@$(CC) $(CFLAGS) $(INCS) -c -o $@ $<
66 | 
67 | clean:
68 | 	rm -rf $(OBJDIR) $(BINDIR)/$(OUTNAME)
69 | 


--------------------------------------------------------------------------------
/README.cn.md:
--------------------------------------------------------------------------------
 1 | # TensorRT
 2 | 
 3 | ---
 4 | * 1、支持OnnX的插件开发，并且实现[CenterNet](https://github.com/xingyizhou/CenterNet)的DCNv2插件demo（fp32/fp16）和Inference实现，附有案例
 5 | * 2、不建议使用pytorch->caffemodel->tensorRT，改用pytorch->onnx->tensorRT，对于任何特定需求（例如dcn、例如双线性插值），可以用插件实现
 6 | * 3、如果不用这里提供的框架，自己实现onnx插件，这里有[一份指导](README.onnx.plugin.md)，说明了关键点，可以做参考
 7 | * 4、视频讲解点击这里：https://www.bilibili.com/video/BV1Pe411x7qr
 8 | 
 9 | 
10 | ## 复现centerNetDCN的检测结果
11 | ![image1](workspace/centernet.coco2x.dcn.17790319373_bd19b24cfc_k.jpg)
12 | 
13 | <br/>
14 | 
15 | ## 复现centerTrack的结果
16 | 
17 | 
18 | 
19 | ![image1](workspace/coco.tracking.jpg)
20 | 
21 | <br/>
22 | 
23 | ## 复现DBFace
24 | 
25 | ![dbface](workspace/selfie.draw.jpg)
26 | 
27 | 
28 | 
29 | 
30 | ## 快速使用
31 | * 安装protobuf v3.8.x，点击[README.onnx.plugin.md](README.onnx.plugin.md)有提到怎么装
32 | ```bash
33 | bash getDLADCN.sh
34 | make run -j32
35 | ```
36 | 
37 | ---
38 | ## 案例-Inference
39 | ```
40 | auto engine = TRTInfer::loadEngine("models/efficientnet-b0.fp32.trtmodel");
41 | float mean[3] = {0.485, 0.456, 0.406};
42 | float std[3] = {0.229, 0.224, 0.225};
43 | Mat image = imread("img.jpg");
44 | engine->input()->setNormMat(0, image, mean, std);
45 | engine->forward();
46 | engine->output(0)->print();
47 | ```
48 | 
49 | ## 环境-Windows
50 | * tensorRT7.0.0.11 (如果修改为6或者其他版本，可能会面临一点改动)
51 | * opencv3.4.6（可以任意修改为其他版本）
52 | * cudnn7.6.3（可以任意修改为其他版本）
53 | * cuda10.0（可以任意修改为其他版本）
54 | * protobuf v3.8.x
55 | * Visual Studio 2017（可以用其他版本打开，但需要修改对应opencv版本）
56 | * <font color=red>如果要修改版本，你需要下载cuda/cudnn/tensorRT三者同时匹配的版本，因为他们互相存在依赖，否则只要你是cuda10.0就可以很轻易编译这个项目</font>
57 | * Windows下的[依赖库lean.zip下载](http://zifuture.com:1000/fs/25.shared/lean.zip)
58 | ---
59 | 
60 | 
61 | ## 环境-Linux
62 | * protobuf v3.8.x
63 | * cuda10.2 （可以任意修改为其他版本）
64 | * cudnn7.6.5.32-cuda10.2 （可以任意修改为其他版本）
65 | * opencv4.2.0 （可以任意修改为其他版本）
66 | * TensorRT-7.0.0.11 (如果修改为6或者其他版本，可能会面临一点改动)
67 | ---
68 | 
69 | 
70 | ## 说明
71 | * pytorch到onnx（autograd.Function类特殊自定义实现函数导出成插件），参考[plugin_onnx_export.py](plugin_onnx_export.py)
72 | * onnx插件MReLU参考[MReLU.cu](src/onnxplugin/plugins/MReLU.cu)，和HSwish参考[HSwish.cu](src/onnxplugin/plugins/HSwish.cu)
73 | * src/plugin底下的插件是实现caffemodel的插件方法，与onnx不兼容，并且不被推荐使用
74 | * int8已经失效，如果需要int8，可以使用[之前的版本并替换为tensorRT6.0](https://github.com/dlunion/tensorRTIntegrate/tree/59e933efc8011bc304d3ccd9fdd1d6cbc7b2e9a0)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 这个版本已经废弃，最新版本，请移步
 2 | - https://github.com/shouxieai/tensorRT_cpp
 3 | - 新版本支持了最新版的tensorRT、yolov5，替换了新的解析器，模型编译报错更少
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | # YoloV5 Support
12 | http://zifuture.com:1556/fs/16.std/release_tensorRT_yolov5.zip
13 | 
14 | 
15 | # TensorRT-Integrate
16 | 
17 | 1. Support pytorch onnx plugin（DCN、HSwish ... etc.）
18 | 2. Simpler inference and plugin APIs
19 | 
20 | <br/>
21 | 
22 | 
23 | ## Re-implement
24 | ##### [CenterNet : ctdet_coco_dla_2x](https://github.com/xingyizhou/CenterNet)
25 | 
26 | ![image1](workspace/results/1.centernet.coco2x.dcn.jpg)
27 | 
28 | <br/>
29 | 
30 | ##### [CenterTrack: coco_tracking](https://github.com/xingyizhou/CenterTrack)
31 | 
32 | ![coco.tracking.jpg](workspace/results/coco.tracking.jpg)
33 | 
34 | * [coco_tracking.onnx download](http://zifuture.com:1556/fs/public_models/coco_tracking.onnx)
35 | 
36 | * [nuScenes_3Dtracking.onnx download](http://zifuture.com:1556/fs/public_models/nuScenes_3Dtracking.onnx)
37 | 
38 | <br/>
39 | 
40 | ##### [DBFace](https://github.com/dlunion/DBFace)
41 | 
42 | ![selfie.draw.jpg](workspace/results/selfie.draw.jpg)
43 | 
44 | 
45 | 
46 | ## Use TensorRT-Integrate
47 | 
48 | install protobuf == 3.11.4 (or >= 3.8.x, But it's more troublesome)
49 | 
50 | ```bash
51 | bash scripts/getALL.sh
52 | make run -j32
53 | ```
54 | 
55 | <br/>
56 | 
57 | ## Inference Code
58 | 
59 | ```
60 | auto engine = TRTInfer::loadEngine("models/efficientnet-b0.fp32.trtmodel");
61 | float mean[3] = {0.485, 0.456, 0.406};
62 | float std[3] = {0.229, 0.224, 0.225};
63 | Mat image = imread("img.jpg");
64 | auto input = engine->input();
65 | 
66 | // multi batch sample
67 | input->resize(2);
68 | input->setNormMatGPU(0, image, mean, std);
69 | input->setNormMatGPU(1, image, mean, std);
70 | 
71 | engine->forward();
72 | 
73 | // get result and copy to cpu
74 | engine->output(0)->cpu<float>();
75 | engine->tensor("hm")->cpu<float>();
76 | ```
77 | 
78 | <br/>
79 | 
80 | ## Environment
81 | 
82 | * tensorRT7.0 or tensorRT6.0
83 | * opencv3.4.6
84 | * cudnn7.6.3
85 | * cuda10.0
86 | * protobuf v3.8.x
87 | * Visual Studio 2017
88 | * [lean-windows.zip (include tensorRT、opencv、cudnn、cuda、protobuf)](http://zifuture.com:1556/fs/25.shared/lean.zip)
89 | 
90 | <br/>
91 | 
92 | ## Plugin
93 | 
94 | 1. Pytorch export ONNX:  [plugin_onnx_export.py](plugin_onnx_export.py)
95 | 2. [MReLU.cu](src/onnxplugin/plugins/MReLU.cu) 、[HSwish.cu](src/onnxplugin/plugins/HSwish.cu)、[DCNv2.cu](src/onnxplugin/plugins/DCNv2.cu)
96 | 
97 | <br/>
98 | 


--------------------------------------------------------------------------------
/README.onnx.plugin.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/README.onnx.plugin.pdf


--------------------------------------------------------------------------------
/TensorRT.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 15
 4 | VisualStudioVersion = 15.0.28307.136
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TensorRT", "TensorRT.vcxproj", "{FBF775F5-DAB4-4BC1-97A9-D36301073438}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|x64 = Debug|x64
11 | 		Release|x64 = Release|x64
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{FBF775F5-DAB4-4BC1-97A9-D36301073438}.Debug|x64.ActiveCfg = Debug|x64
15 | 		{FBF775F5-DAB4-4BC1-97A9-D36301073438}.Debug|x64.Build.0 = Debug|x64
16 | 		{FBF775F5-DAB4-4BC1-97A9-D36301073438}.Release|x64.ActiveCfg = Release|x64
17 | 		{FBF775F5-DAB4-4BC1-97A9-D36301073438}.Release|x64.Build.0 = Release|x64
18 | 	EndGlobalSection
19 | 	GlobalSection(SolutionProperties) = preSolution
20 | 		HideSolutionNode = FALSE
21 | 	EndGlobalSection
22 | 	GlobalSection(ExtensibilityGlobals) = postSolution
23 | 		SolutionGuid = {679F35F0-20AA-4D18-8610-D369E2BE97E8}
24 | 	EndGlobalSection
25 | EndGlobal
26 | 


--------------------------------------------------------------------------------
/TensorRT.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 4 |     <LocalDebuggerWorkingDirectory>workspace</LocalDebuggerWorkingDirectory>
 5 |     <LocalDebuggerEnvironment>PATH=$(projectDir)lean/cuda10.0/bin;$(projectDir)lean/opencv3.4.6/lib;$(projectDir)lean/cudnn7.6.3;$(projectDir)lean/TensorRT-7.0.0.11/lib</LocalDebuggerEnvironment>
 6 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 7 |     <LocalDebuggerMergeEnvironment>false</LocalDebuggerMergeEnvironment>
 8 |   </PropertyGroup>
 9 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> 
10 |     <LocalDebuggerWorkingDirectory>workspace</LocalDebuggerWorkingDirectory>
11 |     <LocalDebuggerEnvironment>PATH=$(projectDir)lean/cuda10.0/bin;$(projectDir)lean/opencv3.4.6/lib;$(projectDir)lean/cudnn7.6.3;$(projectDir)lean/TensorRT-7.0.0.11/lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |     <LocalDebuggerMergeEnvironment>false</LocalDebuggerMergeEnvironment>
14 |   </PropertyGroup>
15 | </Project>


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext
7 | build
8 | DCNv2.egg-info
9 | dist


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Charles Shang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/README.md:
--------------------------------------------------------------------------------
 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0
 2 | 
 3 | ### Build
 4 | ```bash
 5 |     ./make.sh         # build
 6 |     python test.py    # run examples and gradient check 
 7 | ```
 8 | 
 9 | ### An Example
10 | - deformable conv
11 | ```python
12 |     from dcn_v2 import DCN
13 |     input = torch.randn(2, 64, 128, 128).cuda()
14 |     # wrap all things (offset and mask) in DCN
15 |     dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 |     output = dcn(input)
17 |     print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 |     from dcn_v2 import DCNPooling
22 |     input = torch.randn(2, 32, 64, 64).cuda()
23 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 |     x = torch.randint(256, (20, 1)).cuda().float()
25 |     y = torch.randint(256, (20, 1)).cuda().float()
26 |     w = torch.randint(64, (20, 1)).cuda().float()
27 |     h = torch.randint(64, (20, 1)).cuda().float()
28 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 | 
30 |     # mdformable pooling (V2)
31 |     # wrap all things (offset and mask) in DCNPooling
32 |     dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 |                          pooled_size=7,
34 |                          output_dim=32,
35 |                          no_trans=False,
36 |                          group_size=1,
37 |                          trans_std=0.1).cuda()
38 | 
39 |     dout = dpooling(input, rois)
40 | ```
41 | ### Note
42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with,
43 | ```bash
44 | git checkout pytorch_0.4
45 | ```
46 | 
47 | ### Known Issues:
48 | 
49 | - [x] Gradient check w.r.t offset (solved)
50 | - [ ] Backward is not reentrant (minor)
51 | 
52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
53 | 
54 | <s>I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
56 | non-differential points? </s>
57 | 
58 | Update: all gradient check passes with double precision. 
59 | 
60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 
61 | float `<1e-15` for double), 
62 | so it may not be a serious problem (?)
63 | 
64 | Please post an issue or PR if you have any comments.
65 |     


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/dcn_onnx/DCNv2/__init__.py


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py build develop
3 | 


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import glob
 5 | 
 6 | import torch
 7 | 
 8 | from torch.utils.cpp_extension import CUDA_HOME
 9 | from torch.utils.cpp_extension import CppExtension
10 | from torch.utils.cpp_extension import CUDAExtension
11 | 
12 | from setuptools import find_packages
13 | from setuptools import setup
14 | 
15 | requirements = ["torch", "torchvision"]
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "src")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 |     extra_compile_args = {"cxx": []}
28 |     define_macros = []
29 | 
30 |     if torch.cuda.is_available() and CUDA_HOME is not None:
31 |         extension = CUDAExtension
32 |         sources += source_cuda
33 |         define_macros += [("WITH_CUDA", None)]
34 |         extra_compile_args["nvcc"] = [
35 |             "-DCUDA_HAS_FP16=1",
36 |             "-D__CUDA_NO_HALF_OPERATORS__",
37 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
38 |             "-D__CUDA_NO_HALF2_OPERATORS__",
39 |         ]
40 |     else:
41 |         raise NotImplementedError('Cuda is not availabel')
42 | 
43 |     sources = [os.path.join(extensions_dir, s) for s in sources]
44 |     include_dirs = [extensions_dir, "/usr/local/cuda-10.1/include"]
45 |     ext_modules = [
46 |         extension(
47 |             "_ext",
48 |             sources,
49 |             include_dirs=include_dirs,
50 |             define_macros=define_macros,
51 |             extra_compile_args=extra_compile_args,
52 |         )
53 |     ]
54 |     return ext_modules
55 | 
56 | setup(
57 |     name="DCNv2",
58 |     version="0.1",
59 |     author="charlesshang",
60 |     url="https://github.com/charlesshang/DCNv2",
61 |     description="deformable convolutional networks",
62 |     packages=find_packages(exclude=("configs", "tests",)),
63 |     # install_requires=requirements,
64 |     ext_modules=get_extensions(),
65 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
66 | )


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/src/cpu/dcn_v2_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | 
 6 | 
 7 | at::Tensor
 8 | dcn_v2_cpu_forward(const at::Tensor &input,
 9 |                    const at::Tensor &weight,
10 |                    const at::Tensor &bias,
11 |                    const at::Tensor &offset,
12 |                    const at::Tensor &mask,
13 |                    const int kernel_h,
14 |                    const int kernel_w,
15 |                    const int stride_h,
16 |                    const int stride_w,
17 |                    const int pad_h,
18 |                    const int pad_w,
19 |                    const int dilation_h,
20 |                    const int dilation_w,
21 |                    const int deformable_group)
22 | {
23 |     AT_ERROR("Not implement on cpu");
24 | }
25 | 
26 | std::vector<at::Tensor>
27 | dcn_v2_cpu_backward(const at::Tensor &input,
28 |                     const at::Tensor &weight,
29 |                     const at::Tensor &bias,
30 |                     const at::Tensor &offset,
31 |                     const at::Tensor &mask,
32 |                     const at::Tensor &grad_output,
33 |                     int kernel_h, int kernel_w,
34 |                     int stride_h, int stride_w,
35 |                     int pad_h, int pad_w,
36 |                     int dilation_h, int dilation_w,
37 |                     int deformable_group)
38 | {
39 |     AT_ERROR("Not implement on cpu");
40 | }
41 | 
42 | std::tuple<at::Tensor, at::Tensor>
43 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
44 |                                  const at::Tensor &bbox,
45 |                                  const at::Tensor &trans,
46 |                                  const int no_trans,
47 |                                  const float spatial_scale,
48 |                                  const int output_dim,
49 |                                  const int group_size,
50 |                                  const int pooled_size,
51 |                                  const int part_size,
52 |                                  const int sample_per_part,
53 |                                  const float trans_std)
54 | {
55 |     AT_ERROR("Not implement on cpu");
56 | }
57 | 
58 | std::tuple<at::Tensor, at::Tensor>
59 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
60 |                                   const at::Tensor &input,
61 |                                   const at::Tensor &bbox,
62 |                                   const at::Tensor &trans,
63 |                                   const at::Tensor &top_count,
64 |                                   const int no_trans,
65 |                                   const float spatial_scale,
66 |                                   const int output_dim,
67 |                                   const int group_size,
68 |                                   const int pooled_size,
69 |                                   const int part_size,
70 |                                   const int sample_per_part,
71 |                                   const float trans_std)
72 | {
73 |     AT_ERROR("Not implement on cpu");
74 | }


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/src/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cpu_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /*!
  3 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  4 |  *
  5 |  * COPYRIGHT
  6 |  *
  7 |  * All contributions by the University of California:
  8 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  9 |  * All rights reserved.
 10 |  *
 11 |  * All other contributions:
 12 |  * Copyright (c) 2014-2017, the respective contributors
 13 |  * All rights reserved.
 14 |  *
 15 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 16 |  * their contributions to Caffe. The project versioning records all such
 17 |  * contribution and copyright details. If a contributor wants to further mark
 18 |  * their specific copyright on a particular contribution, they should indicate
 19 |  * their copyright solely in the commit message of the change when it is
 20 |  * committed.
 21 |  *
 22 |  * LICENSE
 23 |  *
 24 |  * Redistribution and use in source and binary forms, with or without
 25 |  * modification, are permitted provided that the following conditions are met:
 26 |  *
 27 |  * 1. Redistributions of source code must retain the above copyright notice, this
 28 |  * list of conditions and the following disclaimer.
 29 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 30 |  * this list of conditions and the following disclaimer in the documentation
 31 |  * and/or other materials provided with the distribution.
 32 |  *
 33 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 34 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 35 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 36 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 37 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 38 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 39 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 40 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 41 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 42 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 43 |  *
 44 |  * CONTRIBUTION AGREEMENT
 45 |  *
 46 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 47 |  * or otherwise, the contributor releases their content to the
 48 |  * license and copyright terms herein.
 49 |  *
 50 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 51 |  *
 52 |  * Copyright (c) 2018 Microsoft
 53 |  * Licensed under The MIT License [see LICENSE for details]
 54 |  * \file modulated_deformable_im2col.h
 55 |  * \brief Function definitions of converting an image to
 56 |  * column matrix based on kernel, padding, dilation, and offset.
 57 |  * These functions are mainly used in deformable convolution operators.
 58 |  * \ref: https://arxiv.org/abs/1811.11168
 59 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
 60 |  */
 61 | 
 62 | /***************** Adapted by Charles Shang *********************/
 63 | 
 64 | #ifndef DCN_V2_IM2COL_CUDA
 65 | #define DCN_V2_IM2COL_CUDA
 66 | 
 67 | #ifdef __cplusplus
 68 | extern "C"
 69 | {
 70 | #endif
 71 | 
 72 |   void modulated_deformable_im2col_cuda(cudaStream_t stream,
 73 |                                         const float *data_im, const float *data_offset, const float *data_mask,
 74 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 75 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 76 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 77 |                                         const int dilation_h, const int dilation_w,
 78 |                                         const int deformable_group, float *data_col);
 79 | 
 80 |   void modulated_deformable_col2im_cuda(cudaStream_t stream,
 81 |                                         const float *data_col, const float *data_offset, const float *data_mask,
 82 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 83 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 84 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 85 |                                         const int dilation_h, const int dilation_w,
 86 |                                         const int deformable_group, float *grad_im);
 87 | 
 88 |   void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
 89 |                                          const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
 90 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
 91 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 92 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 93 |                                          const int dilation_h, const int dilation_w,
 94 |                                          const int deformable_group,
 95 |                                          float *grad_offset, float *grad_mask);
 96 | 
 97 | #ifdef __cplusplus
 98 | }
 99 | #endif
100 | 
101 | #endif


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/src/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cuda_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/src/dcn_v2.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "cpu/vision.h"
  4 | 
  5 | #ifdef WITH_CUDA
  6 | #include "cuda/vision.h"
  7 | #endif
  8 | 
  9 | at::Tensor
 10 | dcn_v2_forward(const at::Tensor &input,
 11 |                const at::Tensor &weight,
 12 |                const at::Tensor &bias,
 13 |                const at::Tensor &offset,
 14 |                const at::Tensor &mask,
 15 |                const int kernel_h,
 16 |                const int kernel_w,
 17 |                const int stride_h,
 18 |                const int stride_w,
 19 |                const int pad_h,
 20 |                const int pad_w,
 21 |                const int dilation_h,
 22 |                const int dilation_w,
 23 |                const int deformable_group)
 24 | {
 25 |     if (input.type().is_cuda())
 26 |     {
 27 | #ifdef WITH_CUDA
 28 |         return dcn_v2_cuda_forward(input, weight, bias, offset, mask,
 29 |                                    kernel_h, kernel_w,
 30 |                                    stride_h, stride_w,
 31 |                                    pad_h, pad_w,
 32 |                                    dilation_h, dilation_w,
 33 |                                    deformable_group);
 34 | #else
 35 |         AT_ERROR("Not compiled with GPU support");
 36 | #endif
 37 |     }
 38 |     AT_ERROR("Not implemented on the CPU");
 39 | }
 40 | 
 41 | std::vector<at::Tensor>
 42 | dcn_v2_backward(const at::Tensor &input,
 43 |                 const at::Tensor &weight,
 44 |                 const at::Tensor &bias,
 45 |                 const at::Tensor &offset,
 46 |                 const at::Tensor &mask,
 47 |                 const at::Tensor &grad_output,
 48 |                 int kernel_h, int kernel_w,
 49 |                 int stride_h, int stride_w,
 50 |                 int pad_h, int pad_w,
 51 |                 int dilation_h, int dilation_w,
 52 |                 int deformable_group)
 53 | {
 54 |     if (input.type().is_cuda())
 55 |     {
 56 | #ifdef WITH_CUDA
 57 |         return dcn_v2_cuda_backward(input,
 58 |                                     weight,
 59 |                                     bias,
 60 |                                     offset,
 61 |                                     mask,
 62 |                                     grad_output,
 63 |                                     kernel_h, kernel_w,
 64 |                                     stride_h, stride_w,
 65 |                                     pad_h, pad_w,
 66 |                                     dilation_h, dilation_w,
 67 |                                     deformable_group);
 68 | #else
 69 |         AT_ERROR("Not compiled with GPU support");
 70 | #endif
 71 |     }
 72 |     AT_ERROR("Not implemented on the CPU");
 73 | }
 74 | 
 75 | std::tuple<at::Tensor, at::Tensor>
 76 | dcn_v2_psroi_pooling_forward(const at::Tensor &input,
 77 |                              const at::Tensor &bbox,
 78 |                              const at::Tensor &trans,
 79 |                              const int no_trans,
 80 |                              const float spatial_scale,
 81 |                              const int output_dim,
 82 |                              const int group_size,
 83 |                              const int pooled_size,
 84 |                              const int part_size,
 85 |                              const int sample_per_part,
 86 |                              const float trans_std)
 87 | {
 88 |     if (input.type().is_cuda())
 89 |     {
 90 | #ifdef WITH_CUDA
 91 |         return dcn_v2_psroi_pooling_cuda_forward(input,
 92 |                                                  bbox,
 93 |                                                  trans,
 94 |                                                  no_trans,
 95 |                                                  spatial_scale,
 96 |                                                  output_dim,
 97 |                                                  group_size,
 98 |                                                  pooled_size,
 99 |                                                  part_size,
100 |                                                  sample_per_part,
101 |                                                  trans_std);
102 | #else
103 |         AT_ERROR("Not compiled with GPU support");
104 | #endif
105 |     }
106 |     AT_ERROR("Not implemented on the CPU");
107 | }
108 | 
109 | std::tuple<at::Tensor, at::Tensor>
110 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,
111 |                               const at::Tensor &input,
112 |                               const at::Tensor &bbox,
113 |                               const at::Tensor &trans,
114 |                               const at::Tensor &top_count,
115 |                               const int no_trans,
116 |                               const float spatial_scale,
117 |                               const int output_dim,
118 |                               const int group_size,
119 |                               const int pooled_size,
120 |                               const int part_size,
121 |                               const int sample_per_part,
122 |                               const float trans_std)
123 | {
124 |     if (input.type().is_cuda())
125 |     {
126 | #ifdef WITH_CUDA
127 |         return dcn_v2_psroi_pooling_cuda_backward(out_grad,
128 |                                                   input,
129 |                                                   bbox,
130 |                                                   trans,
131 |                                                   top_count,
132 |                                                   no_trans,
133 |                                                   spatial_scale,
134 |                                                   output_dim,
135 |                                                   group_size,
136 |                                                   pooled_size,
137 |                                                   part_size,
138 |                                                   sample_per_part,
139 |                                                   trans_std);
140 | #else
141 |         AT_ERROR("Not compiled with GPU support");
142 | #endif
143 |     }
144 |     AT_ERROR("Not implemented on the CPU");
145 | }


--------------------------------------------------------------------------------
/dcn_onnx/DCNv2/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "dcn_v2.h"
 3 | 
 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 5 |   m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
 6 |   m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
 7 |   m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
 8 |   m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
 9 | }
10 | 


--------------------------------------------------------------------------------
/dcn_onnx/README.md:
--------------------------------------------------------------------------------
1 | ## 说明
2 | 1. 下载官方的代码
3 | 2. 下载模型
4 | 3. 替换这里的几个py（请着重看一下这几个py里面，用==================修改的部分===================做的标记，没有修改太多，可以自己尝试改下）
5 | 4. 执行dladcn_export_onnx.py，导出onnx即可


--------------------------------------------------------------------------------
/dcn_onnx/dladcn_export_onnx.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # 请下载官方的代码，然后执行这个就可以生成了
 3 | import numpy as np
 4 | import torch
 5 | import torch.onnx.utils as onnx
 6 | import models.networks.pose_dla_dcn as net
 7 | from collections import OrderedDict
 8 | import cv2
 9 | 
10 | model = net.get_pose_net(num_layers=34, heads={'hm': 80, 'wh': 2, 'reg': 2})
11 | 
12 | # https://github.com/xingyizhou/CenterNet/blob/master/readme/MODEL_ZOO.md 这里下载的
13 | # 如果下载不了，可以尝试我提供的连接：http://zifuture.com:1000/fs/public_models/ctdet_coco_dla_2x.pth
14 | checkpoint = torch.load(r"ctdet_coco_dla_2x.pth", map_location="cpu")
15 | checkpoint = checkpoint["state_dict"]
16 | change = OrderedDict()
17 | for key, op in checkpoint.items():
18 |     change[key.replace("module.", "", 1)] = op
19 | 
20 | model.load_state_dict(change)
21 | model.eval()
22 | model.cuda()
23 | 
24 | input = torch.zeros((1, 3, 32, 32)).cuda()
25 | 
26 | # 有个已经导出好的模型：http://zifuture.com:1000/fs/public_models/dladcnv2.onnx
27 | onnx.export(model, (input), "dladcnv2.onnx", output_names=["hm", "wh", "reg", "hm_pool"], verbose=True)
28 | 


--------------------------------------------------------------------------------
/lean/.gitignore:
--------------------------------------------------------------------------------
1 | /tensorRT6.0.1.5
2 | /cuda10.0
3 | /cudnn7.6.3
4 | /opencv3.4.6
5 | /TensorRT-7.0.0.11
6 | /protobuf3.11.4


--------------------------------------------------------------------------------
/lean/README.md:
--------------------------------------------------------------------------------
1 | # Lean
2 | 
3 | 编译好的protobuf Windows版本：http://zifuture.com:1000/fs/25.shared/lean.zip


--------------------------------------------------------------------------------
/plugin_onnx_export.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import torch.nn as nn
 4 | import json
 5 |  
 6 | # 一个内置插件
 7 | class HSwishImplementation(torch.autograd.Function):
 8 | 
 9 |     @staticmethod
10 |     def symbolic(g, input, bias):
11 |         return g.op("HSwish", input, bias, info_s="string attribute", kernel_size_i=3, eps_f=3e-2)
12 | 
13 |     @staticmethod
14 |     def forward(ctx, i, bias):
15 |         ctx.save_for_backward(i)
16 |         return i * F.relu6(i + 3) / 6 + bias
17 | 
18 | class MemoryEfficientHSwish(nn.Module):
19 |     def __init__(self, shape):
20 |         super(MemoryEfficientHSwish, self).__init__()
21 |         self.bias = nn.Parameter(torch.zeros(shape))
22 |         self.bias.data.fill_(3.15)
23 | 
24 |     def forward(self, x):
25 |         return HSwishImplementation.apply(x, self.bias)
26 | 
27 | 
28 | # 一个通过本框架实现的插件
29 | class MReLUImplementation(torch.autograd.Function):
30 | 
31 |     @staticmethod
32 |     def symbolic(g, input, bias):
33 |         return g.op("Plugin", input, bias, name_s="MReLU", info_s=json.dumps({
34 |             "kernel_size": 3,
35 |             "eps": 3e-2,
36 |             "other": "Hello Onnx Plugin"
37 |         }))
38 | 
39 |     @staticmethod
40 |     def forward(ctx, i, bias):
41 |         ctx.save_for_backward(i)
42 |         return F.relu(i) + bias
43 | 
44 | class MReLU(nn.Module):
45 |     def __init__(self, *shape):
46 |         super(MReLU, self).__init__()
47 |         self.bias = nn.Parameter(torch.zeros(shape))
48 |         self.bias.data.fill_(0.5)
49 | 
50 |     def forward(self, x):
51 |         return MReLUImplementation.apply(x, self.bias)
52 | 
53 | class FooModel(torch.nn.Module):
54 |     def __init__(self):
55 |         super(FooModel, self).__init__()
56 |         self.hswish = MemoryEfficientHSwish(1)
57 |         self.mrelu = MReLU(1)
58 | 
59 |     def forward(self, input1, input2):
60 |         return self.mrelu(input2) + self.hswish(input1)
61 | 
62 | dummy_input1 = torch.zeros((1, 3, 3, 3))
63 | dummy_input2 = torch.zeros((1, 3, 3, 3))
64 | model = FooModel()
65 | 
66 | dummy_input1[...] = 0.25
67 | dummy_input2[...] = 0
68 | out = model(dummy_input1, dummy_input2)
69 | print(out)
70 | 
71 | torch.onnx.export(model, (dummy_input1, dummy_input2), 'workspace/models/demo.onnx', verbose=True)


--------------------------------------------------------------------------------
/scripts/getALL.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | bash scripts/getCenterTrack.sh
3 | bash scripts/getDBFace.sh
4 | bash scripts/getDLADCN.sh


--------------------------------------------------------------------------------
/scripts/getCenterTrack.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget http://zifuture.com:1000/fs/public_models/coco_tracking.onnx -O workspace/models/coco_tracking.onnx


--------------------------------------------------------------------------------
/scripts/getDBFace.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget http://zifuture.com:1000/fs/public_models/dbface.onnx -O workspace/models/dbface.onnx


--------------------------------------------------------------------------------
/scripts/getDLADCN.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget http://zifuture.com:1000/fs/public_models/dladcnv2.onnx -O workspace/models/dladcnv2.onnx
3 | 


--------------------------------------------------------------------------------
/src/builder/trt_builder.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef TRT_BUILDER_HPP
 4 | #define TRT_BUILDER_HPP
 5 | 
 6 | #include <string>
 7 | #include <vector>
 8 | #include <functional>
 9 | #include <opencv2/opencv.hpp>
10 | 
11 | namespace TRTBuilder {
12 | 
13 | 	typedef std::function<void(int current, int count, cv::Mat& inputOutput)> Int8Process;
14 | 
15 | 	void setDevice(int device_id);
16 | 
17 | 	enum ModelSourceType {
18 | 		ModelSourceType_FromCaffe,
19 | 		ModelSourceType_FromONNX
20 | 	};
21 | 
22 | 	class ModelSource {
23 | 	public:
24 | 		ModelSource(const std::string& prototxt, const std::string& caffemodel);
25 | 		ModelSource(const std::string& onnxmodel);
26 | 		ModelSourceType type() const;
27 | 		std::string prototxt() const;
28 | 		std::string caffemodel() const;
29 | 		std::string onnxmodel() const;
30 | 
31 | 	private:
32 | 		std::string prototxt_, caffemodel_;
33 | 		std::string onnxmodel_;
34 | 		ModelSourceType type_;
35 | 	};
36 | 
37 | 	class InputDims {
38 | 	public:
39 | 		InputDims(int channels, int height, int width);
40 | 
41 | 		int channels() const;
42 | 		int height() const;
43 | 		int width() const;
44 | 
45 | 	private:
46 | 		int channels_, height_, width_;
47 | 	};
48 | 
49 | 	enum TRTMode {
50 | 		TRTMode_FP32,
51 | 		TRTMode_FP16
52 | 	};
53 | 
54 | 	const char* modeString(TRTMode type);
55 | 
56 | 	bool compileTRT(
57 | 		TRTMode mode,
58 | 		const std::vector<std::string>& outputs,
59 | 		unsigned int batchSize,
60 | 		const ModelSource& source,
61 | 		const std::string& savepath,
62 | 		const std::vector<InputDims> inputsDimsSetup = {});
63 | };
64 | 
65 | #endif //TRT_BUILDER_HPP


--------------------------------------------------------------------------------
/src/caffeplugin/caffeplugin.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef PLUGIN_BASE_HPP
  3 | #define PLUGIN_BASE_HPP
  4 | 
  5 | #include <memory>
  6 | #include <NvInfer.h>
  7 | #include <NvCaffeParser.h>
  8 | #include <vector>
  9 | #include <common/cc_util.hpp>
 10 | #include <common/trt_common.hpp>
 11 | #include <set>
 12 | #include <infer/trt_infer.hpp>
 13 | #include <NvInferRuntimeCommon.h>
 14 | #include <cuda_fp16.h>
 15 | 
 16 | namespace Plugin {
 17 | 
 18 | 	enum Phase {
 19 | 		CompilePhase,
 20 | 		InferencePhase
 21 | 	};
 22 | 
 23 | 	struct GTensor {
 24 | 		GTensor() {}
 25 | 		GTensor(const TRTInfer::Tensor& tensor);
 26 | 		GTensor(float* ptr, int n, int c, int h, int w);
 27 | 		GTensor(TRTInfer::halfloat* ptr, int n, int c, int h, int w);
 28 | 		int count(int start_axis = 0) const;
 29 | 		inline int offset(int n = 0, int c = 0, int h = 0, int w = 0) const { return ((n * this->channel_ + c) * this->height_ + h) * this->width_ + w; }
 30 | 
 31 | 		template<typename _T>
 32 | 		inline _T* ptr() const { return (_T*)ptr_; }
 33 | 
 34 | 		template<typename _T>
 35 | 		inline _T* ptr(int n, int c = 0, int h = 0, int w = 0) const { return (_T*)ptr_ + offset(n, c, h, w); }
 36 | 
 37 | 		inline float* ptr_float() const { return (float*)ptr_; }
 38 | 		inline float* ptr_float(int n, int c = 0, int h = 0, int w = 0) const { return (float*)ptr_ + offset(n, c, h, w); }
 39 | 		inline TRTInfer::halfloat* ptr_half() const { return (TRTInfer::halfloat*)ptr_; }
 40 | 		inline TRTInfer::halfloat* ptr_half(int n, int c = 0, int h = 0, int w = 0) const { return (TRTInfer::halfloat*)ptr_ + offset(n, c, h, w); }
 41 | 
 42 | 		int num_ = 0, channel_ = 0, height_ = 0, width_ = 0;
 43 | 		void* ptr_ = nullptr;
 44 | 		TRTInfer::DataType dtType_ = TRTInfer::DataType::dtFloat;
 45 | 	};
 46 | 
 47 | 	struct LayerConfig {
 48 | 
 49 | 		///////////////////////////////////
 50 | 		int nbOutput_ = 1;
 51 | 		size_t workspaceSize_ = 0;
 52 | 		std::set<nvinfer1::DataType> supportDataType_;
 53 | 		std::set<nvinfer1::PluginFormat> supportPluginFormat_;
 54 | 
 55 | 		std::vector<std::shared_ptr<TRTInfer::Tensor>> weights_;
 56 | 		TRTInfer::DataType configDataType_;
 57 | 		nvinfer1::PluginFormat configPluginFormat_;
 58 | 		int configMaxbatchSize_ = 0;		
 59 | 
 60 | 		///////////////////////////////////
 61 | 		std::vector<nvinfer1::Dims> input;
 62 | 		std::vector<nvinfer1::Dims> output;
 63 | 		std::string serializeData_;
 64 | 
 65 | 		LayerConfig();
 66 | 		void serialCopyTo(void* buffer);
 67 | 		int serialize();
 68 | 		void deserialize(const void* ptr, size_t length);
 69 | 		void loadWeights(const nvinfer1::Weights* weights, int nbWeights);
 70 | 		virtual void seril(ccutil::BinIO& out) {}
 71 | 		virtual void deseril(ccutil::BinIO& in) {}
 72 | 	};
 73 | 
 74 | 	class TRTPlugin : public nvinfer1::IPluginExt {
 75 | 	public:
 76 | 
 77 | 		virtual ~TRTPlugin();
 78 | 		virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) = 0;
 79 | 		virtual int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) = 0;
 80 | 
 81 | 		void pluginInit(const std::string& name, const nvinfer1::Weights* weights, int nbWeights);
 82 | 		void pluginInit(const std::string& name, const void* serialData, size_t serialLength);
 83 | 
 84 | 		virtual std::shared_ptr<LayerConfig> config(const std::string& layerName);
 85 | 		virtual bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const;
 86 | 		virtual void configureWithFormat(
 87 | 			const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims,
 88 | 			int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize);
 89 | 		virtual int getNbOutputs() const;
 90 | 		virtual nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims);
 91 | 		virtual int initialize();
 92 | 		virtual void terminate();
 93 | 		virtual size_t getWorkspaceSize(int maxBatchSize) const override;
 94 | 		virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream);
 95 | 		virtual size_t getSerializationSize();
 96 | 		virtual void serialize(void* buffer);
 97 | 
 98 | 	private:
 99 | 		void mappingToGTensor();
100 | 
101 | 	protected:
102 | 		std::string layerName_;
103 | 		Phase phase_ = CompilePhase;
104 | 		std::shared_ptr<LayerConfig> config_;
105 | 		std::vector<GTensor> inputTensors_;
106 | 		std::vector<GTensor> outputTensors_;
107 | 		std::vector<GTensor> weightTensors_;
108 | 	};
109 | 
110 | #define SETUP_PLUGIN(class_, pattern_)									  \
111 | 	static std::string pattern() {										  \
112 | 		return pattern_;												  \
113 | 	}																	  \
114 | 																		  \
115 | 	static std::shared_ptr<Plugin::TRTPlugin> creator() {					  \
116 | 		return std::shared_ptr<Plugin::TRTPlugin>(new class_());			  \
117 | 	}
118 | 
119 | #define RegisterPlugin(class_)	\
120 | 	static Plugin::PluginRegister __register##class_(class_::creator, class_::pattern())
121 | 	
122 | 	typedef std::shared_ptr<TRTPlugin>(*PluginCreater)();
123 | 
124 | 	struct PluginInfo {
125 | 		PluginCreater creater;
126 | 		std::string pattern;
127 | 	};
128 | 
129 | 	class PluginRegistry {
130 | 	public:
131 | 		virtual void addPlugin(PluginCreater creater, const std::string& pattern) = 0;
132 | 		virtual PluginInfo* findPlugin(const std::string& layerName) = 0;
133 | 	};
134 | 
135 | 	class PluginRegister {
136 | 	public:
137 | 		PluginRegister(PluginCreater creater, const std::string& pattern);
138 | 	};
139 | 
140 | 	class TRTBuilderPluginFactory : public nvcaffeparser1::IPluginFactoryExt, public nvinfer1::IPluginFactory {
141 | 
142 | 	public:
143 | 		virtual bool isPluginExt(const char* layerName) override;
144 | 		virtual bool isPlugin(const char* layerName) override;
145 | 		virtual nvinfer1::IPluginExt* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override;
146 | 		virtual nvinfer1::IPluginExt* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override;
147 | 
148 | 		virtual bool support(const std::string& layerName);
149 | 
150 | 		virtual std::shared_ptr<TRTPlugin> createPlugin(const std::string& layerName);
151 | 		virtual nvinfer1::IPluginExt* builderCreate(const std::string& layerName, const nvinfer1::Weights* weights, int nbWeights);
152 | 		virtual nvinfer1::IPluginExt* inferCreate(const std::string& layerName, const void* serialData, size_t serialLength);
153 | 
154 | 	private:
155 | 		std::vector<std::shared_ptr<nvinfer1::IPluginExt>> plugins_;
156 | 	};
157 | 
158 | 	///////////////////////////////////////////////////////////////////////////////////////////
159 | 	std::shared_ptr<nvcaffeparser1::IPluginFactoryExt> createPluginFactoryForBuildPhase();
160 | 	std::shared_ptr<nvinfer1::IPluginFactory> createPluginFactoryForInferPhase();
161 | 	PluginRegistry* getPluginRegistry();
162 | 
163 | #define ExecuteKernel(numJobs, kernel, stream)		kernel<<<gridDims(numJobs), blockDims(numJobs), 0, stream>>>
164 | }; //namespace Plugin
165 | 
166 | #endif //PLUGIN_BASE_HPP


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/ChannelMultiplicationLayer.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "ChannelMultiplicationLayer.hpp"
 4 | #include <common/json.hpp>
 5 | 
 6 | using namespace Plugin;
 7 | 
 8 | template<typename _T>
 9 | __global__ void channelMultiplicationKernel(const _T* in, const _T* muld, _T* out, int input_area, int edge)
10 | {
11 | 	KERNEL_POSITION;
12 | 
13 | 	int c = position / input_area;
14 | 	out[position] = in[position] * muld[c];
15 | }
16 | 
17 | namespace Plugin {
18 | 
19 | 	std::shared_ptr<LayerConfig> ChannelMultiplicationLayer::config(const std::string& layerName) {
20 | 		auto cfg = TRTPlugin::config(layerName);
21 | 		cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT};
22 | 		//cfg->supportDataType_ = {nvinfer1::DataType::kHALF};
23 | 		return cfg;
24 | 	}
25 | 
26 | 	int ChannelMultiplicationLayer::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
27 | 		auto& data = inputs[0];
28 | 		auto& mul = inputs[1];
29 | 		auto& out = outputs[0];
30 | 		int edge = data.count();
31 | 
32 | 		if (config_->configDataType_ == TRTInfer::DataType::dtFloat) {
33 | 			channelMultiplicationKernel<float> <<<gridDims(edge), blockDims(edge), 0, stream >>> (data.ptr_float(), mul.ptr_float(), out.ptr_float(), data.height_ * data.width_, edge);
34 | 		}
35 | 		else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) {
36 | 			channelMultiplicationKernel<TRTInfer::halfloat> <<<gridDims(edge), blockDims(edge), 0, stream >>> (
37 | 				(const TRTInfer::halfloat*)data.ptr_half(), (const TRTInfer::halfloat*)mul.ptr_half(), out.ptr_half(), data.height_ * data.width_, edge
38 | 			);
39 | 		}
40 | 		return 0;
41 | 	}
42 | 
43 | 	nvinfer1::Dims ChannelMultiplicationLayer::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) {
44 | 		return inputDims[0];
45 | 	}
46 | }
47 | 
48 | RegisterPlugin(ChannelMultiplicationLayer);


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/ChannelMultiplicationLayer.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/caffeplugin/plugins/ChannelMultiplicationLayer.hpp


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/ClipLayer.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "ClipLayer.hpp"
 4 | #include <common/json.hpp>
 5 | 
 6 | using namespace Plugin;
 7 | 
 8 | template<typename _T>
 9 | __global__ void clipKernel(const _T* in, _T* out, int inw, int inh, int outw, int outh, int edge)
10 | {
11 | 	KERNEL_POSITION;
12 | 
13 | 	int outHeightIndex = position;
14 | 	int selectOutInnerY = outHeightIndex % outh;
15 | 	int nc = outHeightIndex / outh;
16 | 	int inHeightIndex = nc * inh + selectOutInnerY;
17 | 	in += inHeightIndex * inw;
18 | 	out += outHeightIndex * outw;
19 | 	for (int i = 0; i < outw; ++i) 
20 | 		*out++ = *in++;
21 | }
22 | 
23 | namespace Plugin {
24 | 
25 | 	std::shared_ptr<LayerConfig> ClipLayer::config(const std::string& layerName) {
26 | 		auto cfg = TRTPlugin::config(layerName);
27 | 		cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT};
28 | 		//cfg->supportDataType_ = {nvinfer1::DataType::kHALF};
29 | 		return cfg;
30 | 	}
31 | 
32 | 	int ClipLayer::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
33 | 		auto& data = inputs[0];
34 | 		auto& out = outputs[0];
35 | 
36 | 		int edge = out.num_ * out.channel_ * out.height_;
37 | 		if (config_->configDataType_ == TRTInfer::DataType::dtFloat) {
38 | 			clipKernel<float> <<<gridDims(edge), blockDims(edge), 0, stream >>> (data.ptr_float(), out.ptr_float(), data.width_, data.height_, out.width_, out.height_, edge);
39 | 		}
40 | 		else if(config_->configDataType_ == TRTInfer::DataType::dtHalfloat) {
41 | 			clipKernel<TRTInfer::halfloat> <<<gridDims(edge), blockDims(edge), 0, stream >>> (data.ptr_half(), out.ptr_half(), data.width_, data.height_, out.width_, out.height_, edge);
42 | 		}
43 | 		return 0;
44 | 	}
45 | 
46 | 	nvinfer1::Dims ClipLayer::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) {
47 | 		return nvinfer1::Dims3(inputDims[0].d[0], inputDims[0].d[1] - 1, inputDims[0].d[2] - 1);
48 | 	}
49 | }
50 | 
51 | RegisterPlugin(ClipLayer);


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/ClipLayer.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/caffeplugin/plugins/ClipLayer.hpp


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/DCNLayer.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef DCNLayer_HPP
 3 | #define DCNLayer_HPP
 4 | 
 5 | #include <caffeplugin/caffeplugin.hpp>
 6 | #include <cublas_v2.h>
 7 | 
 8 | namespace Plugin {
 9 | 	class DCNLayer : public TRTPlugin {
10 | 	public:
11 | 		SETUP_PLUGIN(DCNLayer, "DCN*");
12 | 
13 | 		DCNLayer();
14 | 		virtual ~DCNLayer();
15 | 
16 | 		virtual std::shared_ptr<LayerConfig> config(const std::string& layerName) override;
17 | 
18 | 		virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims);
19 | 		virtual size_t getWorkspaceSize(int maxBatchSize) const override;
20 | 		virtual int enqueue(const std::vector<Plugin::GTensor>& inputs, std::vector<Plugin::GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) override;
21 | 	private:
22 | 		cublasHandle_t cublasHandle_ = nullptr;
23 | 	};
24 | }
25 | 
26 | #endif //DCNLayer_HPP


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/PlexShuffleLayer.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "PlexShuffleLayer.hpp"
 4 | #include <common/json.hpp>
 5 | 
 6 | using namespace Plugin;
 7 | 
 8 | template <typename _T>
 9 | __global__ void pixelShuffleKernel(
10 | 	const _T* bottom_data, _T* top_data, int input_c, int input_h,
11 | 	int input_w, int input_area, int output_h, int output_w, int output_area, int edge)
12 | {
13 | 	KERNEL_POSITION;
14 | 
15 | 	int input_c_index = position / input_area;
16 | 	int f_input_index = position % input_area;
17 | 	int input_row = f_input_index / input_w;
18 | 	int input_col = f_input_index % input_w;
19 | 	int output_c_index = input_c_index / 4;
20 | 	input_c_index = input_c_index % 4;
21 | 	int output_row = input_row * 2 + input_c_index / 2;
22 | 	int output_col = input_col * 2 + input_c_index % 2;
23 | 	int output_index = output_c_index * output_area + output_row * output_w + output_col;
24 | 	top_data[output_index] = bottom_data[position];
25 | }
26 | 
27 | namespace Plugin {
28 | 
29 | 	std::shared_ptr<LayerConfig> PlexShuffleLayer::config(const std::string& layerName){
30 | 		auto cfg = TRTPlugin::config(layerName);
31 | 		cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT};
32 | 		//cfg->supportDataType_ = {nvinfer1::DataType::kHALF};
33 | 		return cfg;
34 | 	}
35 | 
36 | 	int PlexShuffleLayer::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
37 | 		auto& data = inputs[0];
38 | 		auto& out = outputs[0];
39 | 
40 | 		int edge = data.count();
41 | 		if (config_->configDataType_ == TRTInfer::DataType::dtFloat) {
42 | 			pixelShuffleKernel<float> <<<gridDims(edge), blockDims(edge), 0, stream >>> (data.ptr_float(), out.ptr_float(), data.channel_, data.height_, data.width_,
43 | 				data.height_ * data.width_, out.height_, out.width_, out.height_ * out.width_, edge);
44 | 		}
45 | 		else {
46 | 			pixelShuffleKernel<TRTInfer::halfloat> <<<gridDims(edge), blockDims(edge), 0, stream >>> (data.ptr_half(), out.ptr_half(), data.channel_, data.height_, data.width_,
47 | 				data.height_ * data.width_, out.height_, out.width_, out.height_ * out.width_, edge);
48 | 		}
49 | 		return 0;
50 | 	}
51 | 
52 | 	nvinfer1::Dims PlexShuffleLayer::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) {
53 | 		return nvinfer1::Dims3(inputDims[0].d[0] / 4, inputDims[0].d[1] *  2, inputDims[0].d[2] * 2);
54 | 	}
55 | }
56 | 
57 | RegisterPlugin(PlexShuffleLayer);


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/PlexShuffleLayer.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/caffeplugin/plugins/PlexShuffleLayer.hpp


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/TestPlugin.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "TestPlugin.hpp"
 4 | 
 5 | typedef TRTInfer::halfloat halfloat;
 6 | 
 7 | template<typename _T>
 8 | __global__ void MyPluginKenel(_T* input, _T* output, int edge);
 9 | 
10 | template<>
11 | __global__ void MyPluginKenel(float* input, float* output, int edge) {
12 | 
13 | 	KERNEL_POSITION;
14 | 	output[position] = (input[position] < 0 ? 0 : input[position]) + 1.3f;
15 | }
16 | 
17 | template<>
18 | __global__ void MyPluginKenel(halfloat* input, halfloat* output, int edge) {
19 | 
20 | 	KERNEL_POSITION;
21 | 
22 | 	halfloat zero = 0.0f;
23 | 	halfloat add = 1.3f;
24 | 	output[position] = (input[position] < zero ? zero : input[position]) + add;
25 | }
26 | 
27 | nvinfer1::Dims TestPlugin::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) {
28 | 	return inputDims[0];
29 | }
30 | 
31 | std::shared_ptr<LayerConfig> TestPlugin::config(const std::string& layerName) {
32 | 	auto cfg = TRTPlugin::config(layerName);
33 | 
34 | 	//定义我们这个插件支持half和float格式
35 | 	cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT};
36 | 	//cfg->supportDataType_ = {nvinfer1::DataType::kHALF};
37 | 	return cfg;
38 | }
39 | 
40 | int TestPlugin::enqueue(const std::vector<Plugin::GTensor>& inputs, std::vector<Plugin::GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
41 | 
42 | 	int count = inputs[0].count();
43 | 	auto grid = gridDims(count);
44 | 	auto block = blockDims(count);
45 | 
46 | 	if (config_->configDataType_ == TRTInfer::DataType::dtFloat) {
47 | 		MyPluginKenel <<<grid, block >>> (inputs[0].ptr<float>(), outputs[0].ptr<float>(), count);
48 | 	}
49 | 	else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) {
50 | 		MyPluginKenel <<<grid, block>>> (inputs[0].ptr<halfloat>(), outputs[0].ptr<halfloat>(), count);
51 | 	}
52 | 	return 0;
53 | }
54 | 
55 | RegisterPlugin(TestPlugin);


--------------------------------------------------------------------------------
/src/caffeplugin/plugins/TestPlugin.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef WReLU_HPP
 3 | #define WReLU_HPP
 4 | 
 5 | #include <caffeplugin/caffeplugin.hpp>
 6 | 
 7 | using namespace Plugin;
 8 | 
 9 | class TestPlugin : public TRTPlugin {
10 | public:
11 | 	//设置插件函数，通过宏，执行插件创建函数，同时执行模式匹配名字，用来对每个层的名字做模式匹配
12 | 	//该匹配方法用的是ccutil::patternMatch，请参照这个函数
13 | 	SETUP_PLUGIN(TestPlugin, "TestPlugin*");
14 | 
15 | 	virtual std::shared_ptr<LayerConfig> config(const std::string& layerName) override;
16 | 
17 | 	//这个插件只有一个输出，输出的shape等于输入0的shape，因此返回input0的shape
18 | 	virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override;
19 | 
20 | 	//执行过程
21 | 	int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) override;
22 | };
23 | 
24 | #endif //WReLU_HPP


--------------------------------------------------------------------------------
/src/common/trt_common.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "trt_common.hpp"
 4 | #include <math.h>
 5 | 
 6 | dim3 gridDims(int numJobs) {
 7 | 	int numBlockThreads = numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
 8 | 	return dim3(ceil(numJobs / (float)numBlockThreads));
 9 | }
10 | 
11 | dim3 blockDims(int numJobs) {
12 | 	return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
13 | }


--------------------------------------------------------------------------------
/src/common/trt_common.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef TRT_COMMON_HPP
 3 | #define TRT_COMMON_HPP
 4 | 
 5 | #include <cuda_runtime.h>
 6 | 
 7 | #define GPU_BLOCK_THREADS  512
 8 | #define KERNEL_POSITION											\
 9 | 	int position = (blockDim.x * blockIdx.x + threadIdx.x);		\
10 | 	if (position >= (edge)) return;
11 | 
12 | dim3 gridDims(int numJobs);
13 | dim3 blockDims(int numJobs);
14 | 
15 | #endif //TRT_COMMON_HPP


--------------------------------------------------------------------------------
/src/examples/center_net_coco2x_dcn.cpp:
--------------------------------------------------------------------------------
  1 | ﻿
  2 | #include <opencv2/opencv.hpp>
  3 | #include <cc_util.hpp>
  4 | #include "builder/trt_builder.hpp"
  5 | #include "infer/trt_infer.hpp"
  6 | #include "infer/ct_detect_backend.hpp"
  7 | 
  8 | using namespace cv;
  9 | using namespace std;
 10 | 
 11 | namespace examples {
 12 | 
 13 | 	static Rect restoreCenterNetBox(float dx, float dy, float dw, float dh, float cellx, float celly, int stride, Size netSize, Size imageSize) {
 14 | 
 15 | 		float scale = 0;
 16 | 		if (imageSize.width >= imageSize.height)
 17 | 			scale = netSize.width / (float)imageSize.width;
 18 | 		else
 19 | 			scale = netSize.height / (float)imageSize.height;
 20 | 
 21 | 		float x = ((cellx + dx - dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5;
 22 | 		float y = ((celly + dy - dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5;
 23 | 		float r = ((cellx + dx + dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5;
 24 | 		float b = ((celly + dy + dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5;
 25 | 		return Rect(Point(x, y), Point(r + 1, b + 1));
 26 | 	}
 27 | 
 28 | 	static void preprocessCenterNetImageToTensor(const Mat& image, int numIndex, const shared_ptr<TRTInfer::Tensor>& tensor) {
 29 | 
 30 | 		int outH = tensor->height();
 31 | 		int outW = tensor->width();
 32 | 		float sw = outW / (float)image.cols;
 33 | 		float sh = outH / (float)image.rows;
 34 | 		float scale = std::min(sw, sh);
 35 | 
 36 | 		Mat matrix = getRotationMatrix2D(Point2f(image.cols*0.5, image.rows*0.5), 0, scale);
 37 | 		matrix.at<double>(0, 2) -= image.cols*0.5 - outW * 0.5;
 38 | 		matrix.at<double>(1, 2) -= image.rows*0.5 - outH * 0.5;
 39 | 
 40 | 		float mean[3] = {0.40789654, 0.44719302, 0.47026115};
 41 | 		float std[3] = {0.28863828, 0.27408164, 0.27809835};
 42 | 
 43 | 		Mat outimage;
 44 | 		cv::warpAffine(image, outimage, matrix, Size(outW, outH));
 45 | 		tensor->setNormMatGPU(numIndex, outimage, mean, std);
 46 | 	}
 47 | 
 48 | 	static vector<ccutil::BBox> detectBoundingbox(const shared_ptr<TRTInfer::Engine>& boundingboxDetect_, const Mat& image, float threshold = 0.3) {
 49 | 
 50 | 		if (boundingboxDetect_ == nullptr) {
 51 | 			INFO("detectBoundingbox failure call, model is nullptr");
 52 | 			return vector<ccutil::BBox>();
 53 | 		}
 54 | 
 55 | 		preprocessCenterNetImageToTensor(image, 0, boundingboxDetect_->input());
 56 | 		boundingboxDetect_->forward();
 57 | 
 58 | 		auto outHM = boundingboxDetect_->tensor("hm");
 59 | 		auto outHMPool = boundingboxDetect_->tensor("hm_pool");
 60 | 		auto outWH = boundingboxDetect_->tensor("wh");
 61 | 		auto outXY = boundingboxDetect_->tensor("reg");
 62 | 		const int stride = 4;
 63 | 
 64 | 		vector<ccutil::BBox> bboxs;
 65 | 		Size inputSize = boundingboxDetect_->input()->size();
 66 | 		float sx = image.cols / (float)inputSize.width * stride;
 67 | 		float sy = image.rows / (float)inputSize.height * stride;
 68 | 
 69 | 		for (int class_ = 0; class_ < outHM->channel(); ++class_) {
 70 | 			for (int i = 0; i < outHM->height(); ++i) {
 71 | 				float* ohmptr = outHM->cpu<float>(0, class_, i);
 72 | 				float* ohmpoolptr = outHMPool->cpu<float>(0, class_, i);
 73 | 				for (int j = 0; j < outHM->width(); ++j) {
 74 | 					if (*ohmptr == *ohmpoolptr && *ohmpoolptr > threshold) {
 75 | 
 76 | 						float dx = outXY->at<float>(0, 0, i, j);
 77 | 						float dy = outXY->at<float>(0, 1, i, j);
 78 | 						float dw = outWH->at<float>(0, 0, i, j);
 79 | 						float dh = outWH->at<float>(0, 1, i, j);
 80 | 						ccutil::BBox box = restoreCenterNetBox(dx, dy, dw, dh, j, i, stride, inputSize, image.size());
 81 | 						box = box.box() & Rect(0, 0, image.cols, image.rows);
 82 | 						box.label = class_;
 83 | 						box.score = *ohmptr;
 84 | 
 85 | 						if (box.area() > 0)
 86 | 							bboxs.push_back(box);
 87 | 					}
 88 | 					++ohmptr;
 89 | 					++ohmpoolptr;
 90 | 				}
 91 | 			}
 92 | 		}
 93 | 		return bboxs;
 94 | 	}
 95 | 
 96 | 	static vector<vector<ccutil::BBox>> detectBoundingboxOptim(const shared_ptr<TRTInfer::Engine>& boundingboxDetect_, const vector<Mat>& images,
 97 | 		float threshold, int maxobjs, TRTInfer::CTDetectBackend* detectBackend) {
 98 | 
 99 | 		if (boundingboxDetect_ == nullptr) {
100 | 			INFO("detectBoundingbox failure call, model is nullptr");
101 | 			return vector<vector<ccutil::BBox>>();
102 | 		}
103 | 		boundingboxDetect_->input()->resize(images.size());
104 | 
105 | 		vector<Size> imsize;
106 | 		for (int i = 0; i < images.size(); ++i) {
107 | 			preprocessCenterNetImageToTensor(images[i], i, boundingboxDetect_->input());  //1.0 ms
108 | 			imsize.emplace_back(images[i].size());
109 | 		}
110 | 
111 | 		boundingboxDetect_->forward(false);  //41.5 ms
112 | 		auto outHM = boundingboxDetect_->tensor("hm");
113 | 		auto outHMPool = boundingboxDetect_->tensor("hm_pool");
114 | 		auto outWH = boundingboxDetect_->tensor("wh");
115 | 		auto outXY = boundingboxDetect_->tensor("reg");
116 | 		return detectBackend->forwardGPU(outHM, outHMPool, outWH, outXY, imsize, threshold, maxobjs); // 0.25 ms
117 | 	}
118 | 
119 | 	void center_net_coco2x_dcn() {
120 | 
121 | 		INFOW("onnx to trtmodel...");
122 | 
123 | 		// tensorRT 7.0 + OnnX:  Must be an explicit batchsize, that is, batchsize must be specified at compile time.
124 | 		int batchSize = 2;
125 | 		auto modelFile = ccutil::format("models/dladcnv2.fp32.b%d.trtmodel", batchSize);
126 | 		if (!ccutil::exists(modelFile)) {
127 | 
128 | 			if (!ccutil::exists("models/dladcnv2.onnx")) {
129 | 				INFOW(
130 | 					"models/dladcnv2.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dladcnv2.onnx "
131 | 					"or use centerNetDLADCNOnnX/dladcn_export_onnx.py to generate"
132 | 				);
133 | 				return;
134 | 			}
135 | 
136 | 			TRTBuilder::compileTRT(
137 | 				TRTBuilder::TRTMode_FP32, {}, batchSize,
138 | 				TRTBuilder::ModelSource("models/dladcnv2.onnx"),
139 | 				modelFile, {TRTBuilder::InputDims(3, 512, 512)}
140 | 			);
141 | 		}
142 | 
143 | 		INFO("load model: %s", modelFile.c_str());
144 | 		auto engine = TRTInfer::loadEngine(modelFile);
145 | 		if (!engine) {
146 | 			INFO("can not load model.");
147 | 			return;
148 | 		}
149 | 
150 | 		INFO("forward...");
151 | 		vector<Mat> images{
152 | 			imread("imgs/www.jpg"),
153 | 			imread("imgs/17790319373_bd19b24cfc_k.jpg")
154 | 		};
155 | 
156 | 		TRTInfer::CTDetectBackend backend(engine->getCUStream());
157 | 		auto imobjs = detectBoundingboxOptim(engine, images, 0.3, 100, &backend);  // 43.86 ms
158 | 		
159 | 		for (int j = 0; j < images.size(); ++j) {
160 | 			auto& objs = imobjs[j];
161 | 			objs = ccutil::nms(objs, 0.5);
162 | 
163 | 			INFO("objs.length = %d", objs.size());
164 | 			for (int i = 0; i < objs.size(); ++i) {
165 | 				auto& obj = objs[i];
166 | 				ccutil::drawbbox(images[j], obj);
167 | 			}
168 | 			imwrite(ccutil::format("results/%d.centernet.coco2x.dcn.jpg", j), images[j]);
169 | 		}
170 | 
171 | #ifdef _WIN32
172 | 		cv::imshow("dla dcn detect 1", images[0]);
173 | 		cv::imshow("dla dcn detect 2", images[1]);
174 | 		cv::waitKey();
175 | 		cv::destroyAllWindows();
176 | #endif
177 | 		INFO("done.");
178 | 	}
179 | };


--------------------------------------------------------------------------------
/src/examples/center_track_coco_tracking.cpp:
--------------------------------------------------------------------------------
  1 | ﻿
  2 | #include <opencv2/opencv.hpp>
  3 | #include <cc_util.hpp>
  4 | #include "builder/trt_builder.hpp"
  5 | #include "infer/trt_infer.hpp"
  6 | 
  7 | using namespace cv;
  8 | using namespace std;
  9 | 
 10 | namespace examples {
 11 | 
 12 | 	static void drawArrow(cv::Mat& img, cv::Point pStart, cv::Point pEnd, int len, int alpha,
 13 | 		cv::Scalar color, int thickness, int lineType)
 14 | 	{
 15 | 		const double PI = 3.1415926;
 16 | 		Point arrow;
 17 | 		double angle = atan2((double)(pStart.y - pEnd.y), (double)(pStart.x - pEnd.x));
 18 | 		line(img, pStart, pEnd, color, thickness, lineType);
 19 | 
 20 | 		arrow.x = pEnd.x + len * cos(angle + PI * alpha / 180);
 21 | 		arrow.y = pEnd.y + len * sin(angle + PI * alpha / 180);
 22 | 		line(img, pEnd, arrow, color, thickness, lineType);
 23 | 		arrow.x = pEnd.x + len * cos(angle - PI * alpha / 180);
 24 | 		arrow.y = pEnd.y + len * sin(angle - PI * alpha / 180);
 25 | 		line(img, pEnd, arrow, color, thickness, lineType);
 26 | 	}
 27 | 
 28 | 	static Rect restoreCenterNetBox(float dx, float dy, float dw, float dh, float cellx, float celly, int stride, Size netSize, Size imageSize) {
 29 | 
 30 | 		float scale = 0;
 31 | 		if (imageSize.width >= imageSize.height)
 32 | 			scale = netSize.width / (float)imageSize.width;
 33 | 		else
 34 | 			scale = netSize.height / (float)imageSize.height;
 35 | 
 36 | 		float x = ((cellx + dx - dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5;
 37 | 		float y = ((celly + dy - dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5;
 38 | 		float r = ((cellx + dx + dw * 0.5) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5;
 39 | 		float b = ((celly + dy + dh * 0.5) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5;
 40 | 		return Rect(Point(x, y), Point(r + 1, b + 1));
 41 | 	}
 42 | 
 43 | 	static Scalar restoreCenterTracking(float ox, float oy, float cellx, float celly, int stride, Size netSize, Size imageSize) {
 44 | 
 45 | 		float scale = 0;
 46 | 		if (imageSize.width >= imageSize.height)
 47 | 			scale = netSize.width / (float)imageSize.width;
 48 | 		else
 49 | 			scale = netSize.height / (float)imageSize.height;
 50 | 
 51 | 		float x = ((cellx + ox) * stride - netSize.width * 0.5) / scale + imageSize.width * 0.5;
 52 | 		float y = ((celly + oy) * stride - netSize.height * 0.5) / scale + imageSize.height * 0.5;
 53 | 		float x0 = ((cellx)* stride - netSize.width * 0.5) / scale + imageSize.width * 0.5;
 54 | 		float y0 = ((celly)* stride - netSize.height * 0.5) / scale + imageSize.height * 0.5;
 55 | 		return Scalar(x0, y0, x, y);
 56 | 	}
 57 | 
 58 | 	static void preprocessCenterNetImageToTensor(const Mat& image, int numIndex, const shared_ptr<TRTInfer::Tensor>& tensor) {
 59 | 
 60 | 		int outH = tensor->height();
 61 | 		int outW = tensor->width();
 62 | 		float sw = outW / (float)image.cols;
 63 | 		float sh = outH / (float)image.rows;
 64 | 		float scale = std::min(sw, sh);
 65 | 
 66 | 		Mat matrix = getRotationMatrix2D(Point2f(image.cols*0.5, image.rows*0.5), 0, scale);
 67 | 		matrix.at<double>(0, 2) -= image.cols*0.5 - outW * 0.5;
 68 | 		matrix.at<double>(1, 2) -= image.rows*0.5 - outH * 0.5;
 69 | 
 70 | 		float mean[3] = {0.40789654, 0.44719302, 0.47026115};
 71 | 		float std[3] = {0.28863828, 0.27408164, 0.27809835};
 72 | 
 73 | 		Mat outimage;
 74 | 		cv::warpAffine(image, outimage, matrix, Size(outW, outH));
 75 | 		tensor->setNormMatGPU(numIndex, outimage, mean, std);
 76 | 	}
 77 | 
 78 | 	static vector<tuple<ccutil::BBox, Scalar>> detectBoundingboxAndTracking(const shared_ptr<TRTInfer::Engine>& boundingboxAndTrackingDetect_, const Mat& image, const Mat& prevImage, float threshold = 0.3) {
 79 | 
 80 | 		if (boundingboxAndTrackingDetect_ == nullptr) {
 81 | 			INFO("detectBoundingbox failure call, model is nullptr");
 82 | 			return vector<tuple<ccutil::BBox, Scalar>>();
 83 | 		}
 84 | 
 85 | 		preprocessCenterNetImageToTensor(image, 0, boundingboxAndTrackingDetect_->input(0));
 86 | 		preprocessCenterNetImageToTensor(prevImage, 0, boundingboxAndTrackingDetect_->input(1));
 87 | 		boundingboxAndTrackingDetect_->forward();
 88 | 		auto outHM = boundingboxAndTrackingDetect_->tensor("hm");
 89 | 		auto outHMPool = boundingboxAndTrackingDetect_->tensor("hm_pool");
 90 | 		auto outWH = boundingboxAndTrackingDetect_->tensor("wh");
 91 | 		auto outXY = boundingboxAndTrackingDetect_->tensor("reg");
 92 | 		auto outTracking = boundingboxAndTrackingDetect_->tensor("tracking");
 93 | 		const int stride = 4;
 94 | 
 95 | 		vector<tuple<ccutil::BBox, Scalar>> bboxs;
 96 | 		Size inputSize = boundingboxAndTrackingDetect_->input()->size();
 97 | 		float sx = image.cols / (float)inputSize.width * stride;
 98 | 		float sy = image.rows / (float)inputSize.height * stride;
 99 | 
100 | 		for (int class_ = 0; class_ < outHM->channel(); ++class_) {
101 | 			for (int i = 0; i < outHM->height(); ++i) {
102 | 				float* ohmptr = outHM->cpu<float>(0, class_, i);
103 | 				float* ohmpoolptr = outHMPool->cpu<float>(0, class_, i);
104 | 				for (int j = 0; j < outHM->width(); ++j) {
105 | 					if (*ohmptr == *ohmpoolptr && *ohmpoolptr > threshold) {
106 | 
107 | 						float dx = outXY->at<float>(0, 0, i, j);
108 | 						float dy = outXY->at<float>(0, 1, i, j);
109 | 						float dw = outWH->at<float>(0, 0, i, j);
110 | 						float dh = outWH->at<float>(0, 1, i, j);
111 | 						float ox = outTracking->at<float>(0, 0, i, j);
112 | 						float oy = outTracking->at<float>(0, 1, i, j);
113 | 						ccutil::BBox box = restoreCenterNetBox(dx, dy, dw, dh, j, i, stride, inputSize, image.size());
114 | 						auto offset = restoreCenterTracking(ox, oy, j, i, stride, inputSize, image.size());
115 | 						box = box.box() & Rect(0, 0, image.cols, image.rows);
116 | 						box.label = class_;
117 | 						box.score = *ohmptr;
118 | 
119 | 						if (box.area() > 0)
120 | 							bboxs.push_back(make_tuple(box, offset));
121 | 					}
122 | 					++ohmptr;
123 | 					++ohmpoolptr;
124 | 				}
125 | 			}
126 | 		}
127 | 		return bboxs;
128 | 	}
129 | 
130 | 	void center_track_coco_tracking() {
131 | 		INFOW("onnx to trtmodel...");
132 | 
133 | 		if (!ccutil::exists("models/coco_tracking.fp32.trtmodel")) {
134 | 
135 | 			if (!ccutil::exists("models/coco_tracking.onnx")) {
136 | 
137 | 				INFOW(
138 | 					"models/coco_tracking.onnx not found, download url: http://zifuture.com:1000/fs/public_models/coco_tracking.onnx"
139 | 				);
140 | 				return;
141 | 			}
142 | 
143 | 			TRTBuilder::compileTRT(
144 | 				TRTBuilder::TRTMode_FP32, {}, 1,
145 | 				TRTBuilder::ModelSource("models/coco_tracking.onnx"),
146 | 				"models/coco_tracking.fp32.trtmodel", 
147 | 				{TRTBuilder::InputDims(3, 512, 512), TRTBuilder::InputDims(3, 512, 512)}
148 | 			);
149 | 		}
150 | 
151 | 		INFO("load model: models/coco_tracking.fp32.trtmodel");
152 | 		auto engine = TRTInfer::loadEngine("models/coco_tracking.fp32.trtmodel");
153 | 		if (!engine) {
154 | 			INFO("can not load model.");
155 | 			return;
156 | 		}
157 | 
158 | 		INFO("forward...");
159 | 		Mat prevImage = imread("imgs/000020.jpg");
160 | 		Mat image = imread("imgs/000023.jpg");
161 | 
162 | 		auto objs = detectBoundingboxAndTracking(engine, image, prevImage, 0.35);
163 | 
164 | 		INFO("objs.length = %d", objs.size());
165 | 		for (int i = 0; i < objs.size(); ++i) {
166 | 			auto& obj = objs[i];
167 | 			auto& box = get<0>(obj);
168 | 			auto& offset = get<1>(obj);
169 | 			ccutil::drawbbox(image, box, ccutil::DrawType::NoName);
170 | 			drawArrow(image, Point(offset[0], offset[1]), Point(offset[2], offset[3]), 10, 35, Scalar(0, 255, 0), 2, 16);
171 | 
172 | 			ccutil::drawbbox(prevImage, box, ccutil::DrawType::NoName);
173 | 			drawArrow(prevImage, Point(offset[0], offset[1]), Point(offset[2], offset[3]), 10, 35, Scalar(0, 255, 0), 2, 16);
174 | 		}
175 | 
176 | 		imwrite("results/coco.tracking.jpg", image);
177 | 
178 | #ifdef _WIN32
179 | 		cv::imshow("coco.tracking.current", image);
180 | 		cv::imshow("coco.tracking.prev", prevImage);
181 | 		cv::waitKey();
182 | 		cv::destroyAllWindows();
183 | #endif
184 | 		INFO("done.");
185 | 	}
186 | };


--------------------------------------------------------------------------------
/src/examples/dbface.cpp:
--------------------------------------------------------------------------------
  1 | ﻿
  2 | #include <opencv2/opencv.hpp>
  3 | #include <cc_util.hpp>
  4 | #include "builder/trt_builder.hpp"
  5 | #include "infer/trt_infer.hpp"
  6 | #include "infer/dbface_backend.hpp"
  7 | 
  8 | using namespace cv;
  9 | using namespace std;
 10 | 
 11 | namespace examples {
 12 | 
 13 | 	static float commonExp(float value) {
 14 | 
 15 | 		float gate = 1;
 16 | 		float base = exp(gate);
 17 | 		if (fabs(value) < gate)
 18 | 			return value * base;
 19 | 
 20 | 		if (value > 0) {
 21 | 			return exp(value);
 22 | 		}
 23 | 		else {
 24 | 			return -exp(-value);
 25 | 		}
 26 | 	}
 27 | 
 28 | 	static vector<TRTInfer::FaceBox> detectDBFace(const shared_ptr<TRTInfer::Engine>& dbfaceDetect_, const Mat& image, float threshold = 0.3) {
 29 | 
 30 | 		Assert(image.cols % 32 == 0 || image.rows % 32 == 0);
 31 | 
 32 | 		float mean[3] = {0.408, 0.447, 0.47};
 33 | 		float std[3] = {0.289, 0.274, 0.278};
 34 | 
 35 | 		//dbfaceDetect_->input()->setNormMat(0, image, mean, std);  // 20 ms
 36 | 		dbfaceDetect_->input()->setNormMatGPU(0, image, mean, std);		// 5 ms
 37 | 		dbfaceDetect_->forward();
 38 | 		auto outHM = dbfaceDetect_->tensor("hm");
 39 | 		auto outHMPool = dbfaceDetect_->tensor("pool_hm");
 40 | 		auto outTLRB = dbfaceDetect_->tensor("tlrb");
 41 | 		auto outLandmark = dbfaceDetect_->tensor("landmark");
 42 | 		const int stride = 4;
 43 | 
 44 | 		vector<TRTInfer::FaceBox> bboxs;
 45 | 		Size inputSize = dbfaceDetect_->input()->size();
 46 | 		float sx = image.cols / (float)inputSize.width * stride;
 47 | 		float sy = image.rows / (float)inputSize.height * stride;
 48 | 
 49 | 		for (int class_ = 0; class_ < outHM->channel(); ++class_) {
 50 | 			for (int i = 0; i < outHM->height(); ++i) {
 51 | 				float* ohmptr = outHM->cpu<float>(0, class_, i);
 52 | 				float* ohmpoolptr = outHMPool->cpu<float>(0, class_, i);
 53 | 				for (int j = 0; j < outHM->width(); ++j) {
 54 | 					if (*ohmptr == *ohmpoolptr && *ohmpoolptr > threshold) {
 55 | 
 56 | 						float dx = outTLRB->at<float>(0, 0, i, j);
 57 | 						float dy = outTLRB->at<float>(0, 1, i, j);
 58 | 						float dr = outTLRB->at<float>(0, 2, i, j);
 59 | 						float db = outTLRB->at<float>(0, 3, i, j);
 60 | 						float cx = j;
 61 | 						float cy = i;
 62 | 						float x = (cx - dx) * stride;
 63 | 						float y = (cy - dy) * stride;
 64 | 						float r = (cx + dr) * stride;
 65 | 						float b = (cy + db) * stride;
 66 | 
 67 | 						TRTInfer::FaceBox box(ccutil::BBox(x, y, r, b, *ohmptr, class_));
 68 | 						if (box.area() > 0) {
 69 | 
 70 | 							for (int k = 0; k < 5; ++k) {
 71 | 								float landmark_x = outLandmark->at<float>(0, k, i, j) * 4;
 72 | 								float landmark_y = outLandmark->at<float>(0, k + 5, i, j) * 4;
 73 | 								landmark_x = (commonExp(landmark_x) + cx) * stride;
 74 | 								landmark_y = (commonExp(landmark_y) + cy) * stride;
 75 | 								box.landmark[k] = Point2f(landmark_x, landmark_y);
 76 | 							}
 77 | 							bboxs.push_back(box);
 78 | 						}
 79 | 					}
 80 | 					++ohmptr;
 81 | 					++ohmpoolptr;
 82 | 				}
 83 | 			}
 84 | 		}
 85 | 		return bboxs;
 86 | 	}
 87 | 
 88 | 	static vector<TRTInfer::FaceBox> detectDBFaceOptim(const shared_ptr<TRTInfer::Engine>& dbfaceDetect_, const Mat& image, float threshold, TRTInfer::DBFaceBackend* backend) {
 89 | 
 90 | 		Assert(image.cols % 32 == 0 || image.rows % 32 == 0);
 91 | 
 92 | 		float mean[3] = {0.408, 0.447, 0.47};
 93 | 		float std[3] = {0.289, 0.274, 0.278};
 94 | 
 95 | 		dbfaceDetect_->input()->setNormMatGPU(0, image, mean, std);		// 5 ms
 96 | 		dbfaceDetect_->forward(false);
 97 | 		auto outHM = dbfaceDetect_->tensor("hm");
 98 | 		auto outHMPool = dbfaceDetect_->tensor("pool_hm");
 99 | 		auto outTLRB = dbfaceDetect_->tensor("tlrb");
100 | 		auto outLandmark = dbfaceDetect_->tensor("landmark");
101 | 		const int stride = 4;
102 | 		return backend->forwardGPU(outHM, outHMPool, outTLRB, outLandmark, threshold, 1000)[0]; // 0.25 ms
103 | 	}
104 | 
105 | 	static Mat padImage(const Mat& image, int stride = 32) {
106 | 
107 | 		int w = image.cols;
108 | 		if (image.cols % stride != 0)
109 | 			w = image.cols + (stride - (image.cols % stride));
110 | 
111 | 		int h = image.rows;
112 | 		if (image.rows % stride != 0)
113 | 			h = image.rows + (stride - (image.rows % stride));
114 | 
115 | 		if (Size(w, h) == image.size())
116 | 			return image;
117 | 
118 | 		Mat output(h, w, image.type(), Scalar(0));
119 | 		image.copyTo(output(Rect(0, 0, image.cols, image.rows)));
120 | 		return output;
121 | 	}
122 | 
123 | 	void dbface() {
124 | 
125 | 		Mat image = imread("imgs/selfie.jpg");
126 | 		if (image.empty()) {
127 | 			INFOW("image load fail");
128 | 			return;
129 | 		}
130 | 
131 | 		Mat padimage = padImage(image);
132 | 		int maxBatchSize = 1;
133 | 		string modelPath = ccutil::format("models/dbface.%dx%d.fp32.b%d.trtmodel", padimage.cols, padimage.rows, maxBatchSize);
134 | 
135 | 		if (!ccutil::exists(modelPath)) {
136 | 
137 | 			if (!ccutil::exists("models/dbface.onnx")) {
138 | 				INFOW(
139 | 					"models/dbface.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dbface.onnx"
140 | 				);
141 | 				return;
142 | 			}
143 | 
144 | 			TRTBuilder::compileTRT(
145 | 				TRTBuilder::TRTMode_FP32, {}, maxBatchSize,
146 | 				TRTBuilder::ModelSource("models/dbface.onnx"),
147 | 				modelPath, 
148 | 				{TRTBuilder::InputDims(3, padimage.rows, padimage.cols)}
149 | 			);
150 | 		}
151 | 
152 | 		INFO("load model: %s", modelPath.c_str());
153 | 		auto engine = TRTInfer::loadEngine(modelPath);
154 | 		if (!engine) {
155 | 			INFO("can not load model: %s", modelPath.c_str());
156 | 			return;
157 | 		}
158 | 
159 | 		INFO("forward...");
160 | 		TRTInfer::DBFaceBackend backend(engine->getCUStream());
161 | 		auto objs = detectDBFaceOptim(engine, image, 0.25, &backend);
162 | 		
163 | 		INFO("objs.length = %d", objs.size());
164 | 		for (int i = 0; i < objs.size(); ++i) {
165 | 			auto& obj = objs[i];
166 | 			ccutil::drawbbox(image, obj, ccutil::DrawType::Empty);
167 | 
168 | 			for (int k = 0; k < 5; ++k) {
169 | 				cv::circle(image, obj.landmark[k], 3, Scalar(0, 0, 255), -1, 16);
170 | 			}
171 | 		}
172 | 
173 | 		imwrite("results/selfie.draw.jpg", image);
174 | 
175 | #ifdef _WIN32
176 | 		cv::imshow("dbface selfie detect", image);
177 | 		cv::waitKey();
178 | 		cv::destroyAllWindows();
179 | #endif
180 | 		INFO("done.");
181 | 	}
182 | };


--------------------------------------------------------------------------------
/src/examples/onnx.cpp:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | #include <opencv2/opencv.hpp>
 3 | #include <cc_util.hpp>
 4 | #include "builder/trt_builder.hpp"
 5 | #include "infer/trt_infer.hpp"
 6 | 
 7 | using namespace cv;
 8 | using namespace std;
 9 | 
10 | namespace examples {
11 | 
12 | 	void onnx() {
13 | 
14 | 		if (!ccutil::exists("models/demo.onnx")) {
15 | 			INFOE("models/demo.onnx not exists, run< python plugin_onnx_export.py > generate demo.onnx.");
16 | 			return;
17 | 		}
18 | 
19 | 		INFOW("onnx to trtmodel...");
20 | 		TRTBuilder::compileTRT(
21 | 			TRTBuilder::TRTMode_FP32, {}, 4,
22 | 			TRTBuilder::ModelSource("models/demo.onnx"),
23 | 			"models/demo.fp32.trtmodel", 
24 | 			{TRTBuilder::InputDims(3, 5, 5), TRTBuilder::InputDims(3, 5, 5)}
25 | 		);
26 | 		INFO("done.");
27 | 
28 | 		INFO("load model: models/demo.fp32.trtmodel");
29 | 		auto engine = TRTInfer::loadEngine("models/demo.fp32.trtmodel");
30 | 		if (!engine) {
31 | 			INFO("can not load model.");
32 | 			return;
33 | 		}
34 | 
35 | 		INFO("forward...");
36 | 
37 | 		engine->input(0)->setTo(0.25);
38 | 		engine->input(1)->setTo(0);
39 | 		engine->forward();
40 | 		auto output = engine->output(0);
41 | 		output->print();
42 | 		INFO("done.");
43 | 	}
44 | };


--------------------------------------------------------------------------------
/src/import_lib.cpp:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | 
 3 | //导入OpenCV，根据编译情况选择不同库
 4 | #if defined(_DEBUG)
 5 | #	pragma comment(lib, "opencv_world346d.lib")
 6 | #else
 7 | #	pragma comment(lib, "opencv_world346.lib")
 8 | #endif
 9 | 
10 | //导入cuda
11 | #pragma comment(lib, "cuda.lib")
12 | #pragma comment(lib, "cudart.lib")
13 | #pragma comment(lib, "cublas.lib")
14 | #pragma comment(lib, "cudnn.lib")
15 | 
16 | //导入tensorRT
17 | #pragma comment(lib, "nvinfer.lib")
18 | #pragma comment(lib, "nvinfer_plugin.lib")
19 | #pragma comment(lib, "nvparsers.lib")
20 | 
21 | #if defined(_DEBUG)
22 | #pragma comment(lib, "libprotobufd.lib")
23 | #else
24 | #pragma comment(lib, "libprotobuf.lib")
25 | #endif
26 | //#pragma comment(lib, "nvonnxparser.lib")


--------------------------------------------------------------------------------
/src/infer/ct_detect_backend.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include "ct_detect_backend.hpp"
  4 | #include <cc_util.hpp>
  5 | #include <common/trt_common.hpp>
  6 | 
  7 | namespace TRTInfer {
  8 | 
  9 | 	CTDetectBackend::CTDetectBackend(CUStream stream) :Backend(stream){}
 10 | 
 11 | 	static __global__ void CTDetectBackend_forwardGPU(float* hm, float* hmpool, float* wh, float* reg, int* countptr, ccutil::BBox* boxptr, int width, int height, int w_x_h, int channels, int stride, float threshold,
 12 | 		int maxobjs, int imageWidth, int imageHeight, float scale, int edge) {
 13 | 
 14 | 		KERNEL_POSITION;
 15 | 
 16 | 		float confidence = hm[position];
 17 | 		if (confidence != hmpool[position] || confidence < threshold)
 18 | 			return;
 19 | 
 20 | 		int index = atomicAdd(countptr, 1);
 21 | 		if (index >= maxobjs)
 22 | 			return;
 23 | 
 24 | 		int channel_index = position / w_x_h;
 25 | 		int classes = channel_index;
 26 | 		int offsetChannel0 = position - channel_index * w_x_h;
 27 | 		int offsetChannel1 = offsetChannel0 + w_x_h;
 28 | 
 29 | 		int cx = offsetChannel0 % width;
 30 | 		int cy = offsetChannel0 / width;
 31 | 
 32 | 		ccutil::BBox* ptr = boxptr + index;
 33 | 		float dx = reg[offsetChannel0];
 34 | 		float dy = reg[offsetChannel1];
 35 | 		float dw = wh[offsetChannel0];
 36 | 		float dh = wh[offsetChannel1];
 37 | 
 38 | 		ptr->x = ((cx + dx - dw * 0.5 - width * 0.5) * stride) / scale + imageWidth * 0.5;
 39 | 		ptr->y = ((cy + dy - dh * 0.5 - height * 0.5) * stride) / scale + imageHeight * 0.5;
 40 | 		ptr->r = ((cx + dx + dw * 0.5 - width * 0.5) * stride) / scale + imageWidth * 0.5;
 41 | 		ptr->b = ((cy + dy + dh * 0.5 - height * 0.5) * stride) / scale + imageHeight * 0.5;
 42 | 		ptr->score = confidence;
 43 | 		ptr->label = classes;
 44 | 	}
 45 | 
 46 | 	const std::vector<std::vector<ccutil::BBox>>& CTDetectBackend::forwardGPU(std::shared_ptr<Tensor> hm, std::shared_ptr<Tensor> hmpool, std::shared_ptr<Tensor> wh, 
 47 | 		std::shared_ptr<Tensor> reg, const std::vector<cv::Size>& imageSize, float threshold, int maxobjs) {
 48 | 
 49 | 		int count = hm->count(1);  // w * h * c
 50 | 		int width = hm->width();
 51 | 		int height = hm->height();
 52 | 		int batchSize = hm->num();
 53 | 		int channels = hm->channel();
 54 | 		int stride = 4;
 55 | 		auto grid = gridDims(count);
 56 | 		auto block = blockDims(count);
 57 | 
 58 | 		size_t objsStoreSize = maxobjs * sizeof(ccutil::BBox) + sizeof(int);
 59 | 		int heatmapArea = width * height;
 60 | 		void* cpuPtr = getCPUMemory(objsStoreSize * batchSize);
 61 | 		char* cpuPtrInput = (char*)cpuPtr;
 62 | 		void* gpuPtr = getGPUMemory(objsStoreSize * batchSize);
 63 | 		char* gpuPtrInput = (char*)gpuPtr;
 64 | 		auto stream = getStream();
 65 | 
 66 | 		for (int i = 0; i < batchSize; ++i) {
 67 | 
 68 | 			auto& imsize = imageSize[i];
 69 | 			float sw = width * stride / (float)imsize.width;
 70 | 			float sh = height * stride / (float)imsize.height;
 71 | 			float scale = std::min(sw, sh);
 72 | 
 73 | 			float* hm_ptr = hm->gpu<float>(i);
 74 | 			float* hm_pool_ptr = hmpool->gpu<float>(i);
 75 | 			float* wh_ptr = wh->gpu<float>(i);
 76 | 			float* reg_ptr = reg->gpu<float>(i);
 77 | 
 78 | 			int* countPtr = (int*)gpuPtrInput;
 79 | 			ccutil::BBox* boxPtr = (ccutil::BBox*)((char*)gpuPtrInput + sizeof(int));
 80 | 
 81 | 			cudaMemsetAsync(gpuPtrInput, 0, sizeof(int), stream);
 82 | 			CTDetectBackend_forwardGPU <<< grid, block, 0, stream >>> (hm_ptr, hm_pool_ptr, wh_ptr, reg_ptr, countPtr, boxPtr,
 83 | 				width, height, heatmapArea, channels, stride, threshold, maxobjs, imsize.width, imsize.height, scale, count);
 84 | 
 85 | 			cudaMemcpyAsync(cpuPtrInput, gpuPtrInput, objsStoreSize, cudaMemcpyKind::cudaMemcpyDeviceToHost, stream);
 86 | 			
 87 | 			cpuPtrInput += objsStoreSize;
 88 | 			gpuPtrInput += objsStoreSize;
 89 | 		}
 90 | 		cudaStreamSynchronize(stream);
 91 | 
 92 | 		cpuPtrInput = (char*)cpuPtr;
 93 | 		outputs_.resize(batchSize);
 94 | 
 95 | 		for (int i = 0; i < batchSize; ++i, cpuPtrInput += objsStoreSize) {
 96 | 			auto& output = outputs_[i];
 97 | 			output.clear();
 98 | 
 99 | 			int num = *((int*)cpuPtrInput);
100 | 			num = std::min(num, maxobjs);
101 | 
102 | 			if (num == 0) 
103 | 				continue;
104 | 
105 | 			ccutil::BBox* ptr = (ccutil::BBox*)(cpuPtrInput + sizeof(int));
106 | 			output.insert(output.begin(), ptr, ptr + num);
107 | 		}
108 | 		return outputs_;
109 | 	}
110 | };


--------------------------------------------------------------------------------
/src/infer/ct_detect_backend.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef CT_DETECT_BACKEND_HPP
 3 | #define CT_DETECT_BACKEND_HPP
 4 | 
 5 | #include <cc_util.hpp>
 6 | #include <opencv2/opencv.hpp>
 7 | #include <vector>
 8 | #include "trt_backend.hpp"
 9 | 
10 | namespace TRTInfer {
11 | 
12 | 	class CTDetectBackend : public Backend{
13 | 	public:
14 | 		CTDetectBackend(CUStream stream = nullptr);
15 | 
16 | 		const std::vector<std::vector<ccutil::BBox>>& forwardGPU(std::shared_ptr<Tensor> hm, std::shared_ptr<Tensor> hmpool, std::shared_ptr<Tensor> wh, std::shared_ptr<Tensor> reg,
17 | 			const std::vector<cv::Size>& imageSize, float threshold, int maxobjs);
18 | 
19 | 	private:
20 | 		std::vector<std::vector<ccutil::BBox>> outputs_;
21 | 	};
22 | };
23 | 
24 | #endif  // CT_DETECT_BACKEND_HPP


--------------------------------------------------------------------------------
/src/infer/dbface_backend.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include "dbface_backend.hpp"
  4 | #include <cc_util.hpp>
  5 | #include <common/trt_common.hpp>
  6 | 
  7 | namespace TRTInfer {
  8 | 
  9 | 	DBFaceBackend::DBFaceBackend(CUStream stream):Backend(stream){}
 10 | 
 11 | 	static __device__ float commonExp(float value) {
 12 | 
 13 | 		float gate = 1.0f;
 14 | 		if (fabs(value) < gate)
 15 | 			return value * exp(gate);
 16 | 
 17 | 		if (value > 0)
 18 | 			return exp(value);
 19 | 		else
 20 | 			return -exp(-value);
 21 | 	}
 22 | 
 23 | 	static __global__ void DBFaceBackend_forwardGPU(float* hm, float* hmpool, float* tlrb, float* landmark, int* countptr, FaceBox* boxptr,
 24 | 		int width, int height, int w_x_h, int stride, float threshold,
 25 | 		int maxobjs, int edge) {
 26 | 
 27 | 		KERNEL_POSITION;
 28 | 
 29 | 		float confidence = hm[position];
 30 | 		if (confidence != hmpool[position] || confidence < threshold)
 31 | 			return;
 32 | 
 33 | 		int index = atomicAdd(countptr, 1);
 34 | 		if (index >= maxobjs)
 35 | 			return;
 36 | 
 37 | 		int cx = position % width;
 38 | 		int cy = position / width;
 39 | 		int oc0 = position;
 40 | 		int oc1 = position + w_x_h;
 41 | 		int oc2 = position + w_x_h * 2;
 42 | 		int oc3 = position + w_x_h * 3;
 43 | 
 44 | 		FaceBox* ptr = boxptr + index;
 45 | 		float dx = tlrb[oc0];
 46 | 		float dy = tlrb[oc1];
 47 | 		float dr = tlrb[oc2];
 48 | 		float db = tlrb[oc3];
 49 | 
 50 | 		ptr->x = (cx - dx) * stride;
 51 | 		ptr->y = (cy - dy) * stride;
 52 | 		ptr->r = (cx + dr) * stride;
 53 | 		ptr->b = (cy + db) * stride;
 54 | 		ptr->score = confidence;
 55 | 		ptr->label = 0;
 56 | 
 57 | 		for (int k = 0; k < 5; ++k) {
 58 | 			// xxxxx yyyyy
 59 | 			float landmark_x = landmark[position + w_x_h * k] * 4;
 60 | 			float landmark_y = landmark[position + w_x_h * (k + 5)] * 4;
 61 | 
 62 | 			cv::Point2f& point = ptr->landmark[k];
 63 | 			point.x = (commonExp(landmark_x) + cx) * stride;
 64 | 			point.y = (commonExp(landmark_y) + cy) * stride;
 65 | 		}
 66 | 	}
 67 | 
 68 | 	const std::vector<std::vector<FaceBox>>& DBFaceBackend::forwardGPU(std::shared_ptr<Tensor> hm, std::shared_ptr<Tensor> hmpool, std::shared_ptr<Tensor> tlrb, 
 69 | 		std::shared_ptr<Tensor> landmark, float threshold, int maxobjs) {
 70 | 
 71 | 		int width = hm->width();
 72 | 		int height = hm->height();
 73 | 		int batchSize = hm->num();
 74 | 		int count = hm->count(1);	// c * h * w
 75 | 		auto grid = gridDims(count);
 76 | 		auto block = blockDims(count);
 77 | 
 78 | 		size_t objsStoreSize = maxobjs * sizeof(FaceBox) + sizeof(int);
 79 | 		int heatmapArea = width * height;
 80 | 		void* cpuPtr = getCPUMemory(objsStoreSize * batchSize);
 81 | 		char* cpuPtrInput = (char*)cpuPtr;
 82 | 		void* gpuPtr = getGPUMemory(objsStoreSize * batchSize);
 83 | 		char* gpuPtrInput = (char*)gpuPtr;
 84 | 		int stride = 4;
 85 | 		auto stream = getStream();
 86 | 
 87 | 		for (int i = 0; i < batchSize; ++i) {
 88 | 
 89 | 			float* hm_ptr = hm->gpu<float>(i);
 90 | 			float* hm_pool_ptr = hmpool->gpu<float>(i);
 91 | 			float* tlrb_ptr = tlrb->gpu<float>(i);
 92 | 			float* landmark_ptr = landmark->gpu<float>(i);
 93 | 
 94 | 			int* countPtr = (int*)gpuPtrInput;
 95 | 			FaceBox* boxPtr = (FaceBox*)((char*)gpuPtrInput + sizeof(int));
 96 | 
 97 | 			cudaMemsetAsync(gpuPtrInput, 0, sizeof(int), stream);
 98 | 			DBFaceBackend_forwardGPU <<< grid, block, 0, stream >>> (hm_ptr, hm_pool_ptr, tlrb_ptr, landmark_ptr, countPtr, boxPtr,
 99 | 				width, height, heatmapArea, stride, threshold, maxobjs, count);
100 | 
101 | 			cudaMemcpyAsync(cpuPtrInput, gpuPtrInput, objsStoreSize, cudaMemcpyKind::cudaMemcpyDeviceToHost, stream);
102 | 			cpuPtrInput += objsStoreSize;
103 | 			gpuPtrInput += objsStoreSize;
104 | 		}
105 | 		cudaStreamSynchronize(stream);
106 | 
107 | 		cpuPtrInput = (char*)cpuPtr;
108 | 		outputs_.resize(batchSize);
109 | 
110 | 		for (int i = 0; i < batchSize; ++i, cpuPtrInput += objsStoreSize) {
111 | 			auto& output = outputs_[i];
112 | 			output.clear();
113 | 
114 | 			int num = *((int*)cpuPtrInput);
115 | 			num = std::min(num, maxobjs);
116 | 
117 | 			if (num == 0) 
118 | 				continue;
119 | 
120 | 			FaceBox* ptr = (FaceBox*)(cpuPtrInput + sizeof(int));
121 | 			output.insert(output.begin(), ptr, ptr + num);
122 | 		}
123 | 		return outputs_;
124 | 	}
125 | };


--------------------------------------------------------------------------------
/src/infer/dbface_backend.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef DBFACE_BACKEND_HPP
 3 | #define DBFACE_BACKEND_HPP
 4 | 
 5 | #include <cc_util.hpp>
 6 | #include <opencv2/opencv.hpp>
 7 | #include <vector>
 8 | #include "trt_backend.hpp"
 9 | 
10 | namespace TRTInfer {
11 | 
12 | 	struct FaceBox : ccutil::BBox {
13 | 		cv::Point2f landmark[5];
14 | 
15 | 		FaceBox() {}
16 | 		FaceBox(const ccutil::BBox& other):ccutil::BBox(other) {}
17 | 	};
18 | 
19 | 	class DBFaceBackend : public Backend{
20 | 	public:
21 | 		DBFaceBackend(CUStream stream = nullptr);
22 | 
23 | 		const std::vector<std::vector<FaceBox>>& forwardGPU(
24 | 			std::shared_ptr<Tensor> hm, std::shared_ptr<Tensor> hmpool, std::shared_ptr<Tensor> tlrb, std::shared_ptr<Tensor> landmark,
25 | 			float threshold, int maxobjs = 100);
26 | 
27 | 	private:
28 | 		std::vector<std::vector<FaceBox>> outputs_;
29 | 	};
30 | };
31 | 
32 | #endif  // DBFACE_BACKEND_HPP


--------------------------------------------------------------------------------
/src/infer/task_pool.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/src/infer/task_pool.hpp


--------------------------------------------------------------------------------
/src/infer/trt_backend.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "trt_backend.hpp"
 4 | #include <cc_util.hpp>
 5 | #include <common/trt_common.hpp>
 6 | 
 7 | #define cuCheck(op)	 Assert((op) == cudaSuccess)
 8 | 
 9 | namespace TRTInfer {
10 | 
11 | 	Backend::Backend(CUStream stream) {
12 | 
13 | 		this->stream_ = stream;
14 | 		this->ownStream_ = false;
15 | 
16 | 		if (stream == nullptr) {
17 | 			cuCheck(cudaStreamCreate(&stream_));
18 | 			ownStream_ = true;
19 | 		}
20 | 	}
21 | 
22 | 	void* Backend::getCPUMemory(size_t size) {
23 | 		if (cpuMemSize_ >= size)
24 | 			return cpuMemory_;
25 | 
26 | 		releaseCPUMemory();
27 | 		cpuMemSize_ = size;
28 | 		cpuMemory_ = malloc(size);
29 | 		Assert(cpuMemory_ != nullptr);
30 | 		return cpuMemory_;
31 | 	}
32 | 
33 | 	CUStream Backend::getStream() const {
34 | 		return stream_;
35 | 	}
36 | 
37 | 	void Backend::releaseCPUMemory() {
38 | 		if (cpuMemory_) {
39 | 			free(cpuMemory_);
40 | 			cpuMemory_ = nullptr;
41 | 			cpuMemSize_ = 0;
42 | 		}
43 | 	}
44 | 
45 | 	void* Backend::getGPUMemory(size_t size) {
46 | 		if (gpuMemSize_ >= size)
47 | 			return gpuMemory_;
48 | 
49 | 		releaseGPUMemory();
50 | 		gpuMemSize_ = size;
51 | 
52 | 		cuCheck(cudaMalloc(&gpuMemory_, gpuMemSize_));
53 | 		return gpuMemory_;
54 | 	}
55 | 
56 | 	void Backend::releaseGPUMemory() {
57 | 		if (gpuMemory_) {
58 | 			cudaFree(gpuMemory_);
59 | 			gpuMemory_ = nullptr;
60 | 			gpuMemSize_ = 0;
61 | 		}
62 | 	}
63 | 
64 | 	Backend::~Backend() {
65 | 		releaseGPUMemory();
66 | 		releaseCPUMemory();
67 | 
68 | 		if (ownStream_) {
69 | 			cudaStreamDestroy(stream_);
70 | 		}
71 | 		stream_ = nullptr;
72 | 	}
73 | };


--------------------------------------------------------------------------------
/src/infer/trt_backend.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef TRT_BACKEND_HPP
 3 | #define TRT_BACKEND_HPP
 4 | 
 5 | #include <cc_util.hpp>
 6 | #include <opencv2/opencv.hpp>
 7 | #include <vector>
 8 | #include "trt_infer.hpp"
 9 | 
10 | namespace TRTInfer {
11 | 
12 | 	class Backend {
13 | 	public:
14 | 		Backend(CUStream stream = nullptr);
15 | 		virtual ~Backend();
16 | 
17 | 	protected:
18 | 		void* getCPUMemory(size_t size);
19 | 		void releaseCPUMemory();
20 | 
21 | 		void* getGPUMemory(size_t size);
22 | 		void releaseGPUMemory();
23 | 
24 | 		CUStream getStream() const;
25 | 
26 | 	private:
27 | 		void* cpuMemory_ = nullptr;
28 | 		size_t cpuMemSize_ = 0;
29 | 		void* gpuMemory_ = nullptr;
30 | 		size_t gpuMemSize_ = 0;
31 | 
32 | 		CUStream stream_ = nullptr;
33 | 		bool ownStream_ = false;
34 | 	};
35 | };
36 | 
37 | #endif  // TRT_BACKEND_HPP


--------------------------------------------------------------------------------
/src/infer/trt_infer.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #ifndef TRT_INFER_HPP
  4 | #define TRT_INFER_HPP
  5 | 
  6 | #include <string>
  7 | #include <memory>
  8 | #include <vector>
  9 | #include <map>
 10 | #include <opencv2/opencv.hpp>
 11 | 
 12 | class __half;
 13 | struct CUstream_st;
 14 | 
 15 | namespace TRTInfer {
 16 | 
 17 | 	typedef __half halfloat;
 18 | 	typedef CUstream_st* CUStream;
 19 | 
 20 | 	enum DataHead {
 21 | 		DataHead_InGPU,
 22 | 		DataHead_InCPU
 23 | 	};
 24 | 
 25 | 	enum class DataType : int {
 26 | 		dtFloat,
 27 | 		dtHalfloat
 28 | 	};
 29 | 
 30 | 	int dataTypeSize(DataType dt);
 31 | 
 32 | 	class Tensor {
 33 | 	public:
 34 | 		Tensor();
 35 | 		Tensor(const Tensor& other);
 36 | 		Tensor(int n, int c, int h, int w, DataType dtType = DataType::dtFloat);
 37 | 		Tensor(int ndims, const int* dims, DataType dtType = DataType::dtFloat);
 38 | 		Tensor(const std::vector<int>& dims, DataType dtType = DataType::dtFloat);
 39 | 		Tensor& operator = (const Tensor& other);
 40 | 		virtual ~Tensor();
 41 | 
 42 | 		inline DataType type() const { return dtType_; }
 43 | 		inline std::vector<int> dims() const { return {num_, channel_, height_, width_}; }
 44 | 		inline cv::Size size() const { return cv::Size(width_, height_); }
 45 | 		inline int offset(int n = 0, int c = 0, int h = 0, int w = 0) { return ((n * this->channel_ + c) * this->height_ + h) * this->width_ + w; }
 46 | 		inline int num() const { return num_; }
 47 | 		inline int channel() const { return channel_; }
 48 | 		inline int height() const { return height_; }
 49 | 		inline int width() const { return width_; }
 50 | 		inline int bytes() const { return bytes_; }
 51 | 		inline int bytes(int start_axis) const { return count(start_axis) * elementSize(); }
 52 | 		inline int elementSize() const { return dataTypeSize(dtType_); }
 53 | 
 54 | 		void copyFrom(const Tensor& other);
 55 | 		void release();
 56 | 		void setTo(float value);
 57 | 		void setRandom(float low = 0, float high = 1);
 58 | 		bool empty();
 59 | 		void resize(int n, int c = -1, int h = -1, int w = -1);
 60 | 		void resize(int ndims, const int* dims);
 61 | 		void resize(const std::vector<int>& dims);
 62 | 		void reshape(int n, int c = -1, int h = -1, int w = -1);
 63 | 		void reshapeLike(const Tensor& other);
 64 | 		void resizeLike(const Tensor& other);
 65 | 		size_t count(int start_axis = 0) const;
 66 | 		void print();
 67 | 		const char* shapeString();
 68 | 
 69 | 		void toGPU(bool copyedIfCPU = true);
 70 | 		void toCPU(bool copyedIfGPU = true);
 71 | 		void toHalf();
 72 | 		void toFloat();
 73 | 
 74 | 		inline const void* cpu() const { ((Tensor*)this)->toCPU(); return host_; }
 75 | 		inline const void* gpu() const { ((Tensor*)this)->toGPU(); return device_; }
 76 | 		inline void* cpu() { toCPU(); return host_; }
 77 | 		inline void* gpu() { toGPU(); return device_; }
 78 | 		
 79 | 		template<typename DataT> inline const DataT* cpu() const { ((Tensor*)this)->toCPU(); return (const DataT*)host_; }
 80 | 		template<typename DataT> inline const DataT* gpu() const { ((Tensor*)this)->toGPU(); return (const DataT*)device_; }
 81 | 		template<typename DataT> inline DataT* cpu() { toCPU(); return (DataT*)host_; }
 82 | 		template<typename DataT> inline DataT* gpu() { toGPU(); return (DataT*)device_; }
 83 | 		template<typename DataT> inline DataT* cpu(int n, int c = 0, int h = 0, int w = 0) { return cpu<DataT>() + offset(n, c, h, w); }
 84 | 		template<typename DataT> inline DataT* gpu(int n, int c = 0, int h = 0, int w = 0) { return gpu<DataT>() + offset(n, c, h, w); }
 85 | 		template<typename DataT> inline float& at(int n = 0, int c = 0, int h = 0, int w = 0) { return *(cpu<DataT>() + offset(n, c, h, w)); }
 86 | 		inline cv::Mat atMat(int n = 0, int c = 0) { return cv::Mat(height_, width_, CV_32F, cpu<float>(n, c)); }
 87 | 
 88 | 		void setMat(int n, const cv::Mat& image);
 89 | 
 90 | 		void setNormMat(int n, const cv::Mat& image, float mean[3], float std[3]);
 91 | 		void setNormMatGPU(int n, const cv::Mat& image, float mean[3], float std[3]);
 92 | 
 93 | 		//result = (image - mean) * scale
 94 | 		void setMatMeanScale(int n, const cv::Mat& image, float mean[3], float scale = 1.0f);
 95 | 		Tensor transpose(int axis0, int axis1, int axis2, int axis3);
 96 | 		void transposeInplace(int axis0, int axis1, int axis2, int axis3);
 97 | 		void from(const void* ptr, int n, int c = 1, int h = 1, int w = 1, DataType dtType = DataType::dtFloat);
 98 | 
 99 | 		void* getTempGPUMemory(size_t size);
100 | 		void releaseTempGPUMemory();
101 | 
102 | 	private:
103 | 		int num_ = 0, channel_ = 0, height_ = 0, width_ = 0;
104 | 		void* device_ = nullptr;
105 | 		void* host_ = nullptr;
106 | 		size_t capacity_ = 0;
107 | 		size_t bytes_ = 0;
108 | 		DataHead head_ = DataHead_InCPU;
109 | 		DataType dtType_ = DataType::dtFloat;
110 | 		char shapeString_[100];
111 | 		void* tempGPUMemory_ = nullptr;
112 | 		size_t tempGPUMemoryLength = 0;
113 | 	};
114 | 
115 | 	class Engine {
116 | 	public:
117 | 		virtual bool load(const std::string& file) = 0;
118 | 		virtual void destroy() = 0;
119 | 		virtual void forward(bool sync = true) = 0;
120 | 		virtual int maxBatchSize() = 0;
121 | 		virtual CUStream getCUStream() = 0;
122 | 		virtual void synchronize() = 0;
123 | 
124 | 		virtual std::shared_ptr<Tensor> input(int index = 0) = 0;
125 | 		virtual std::shared_ptr<Tensor> output(int index = 0) = 0;
126 | 		virtual std::shared_ptr<Tensor> tensor(const std::string& name) = 0;
127 | 	};
128 | 
129 | 	void setDevice(int device_id);
130 | 	std::shared_ptr<Engine> loadEngine(const std::string& file);
131 | 	bool initNVPlugins();
132 | };	//TRTInfer
133 | 
134 | 
135 | #endif //TRT_INFER_HPP


--------------------------------------------------------------------------------
/src/infer/trt_infer_norm.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "trt_infer.hpp"
 4 | #include <common/trt_common.hpp>
 5 | 
 6 | namespace TRTInfer {
 7 | 	static __global__ void ImageNormMeanStd_forwardGPU_impl(float* d0, float* d1, float* d2, float m0, float m1, float m2, float std0, float std1, float std2, unsigned char* src, int edge) {
 8 | 
 9 | 		KERNEL_POSITION;
10 | 
11 | 		float* pptr_dest[3] = {d0, d1, d2};
12 | 		float mean[3] = {m0, m1, m2};
13 | 		float std[3] = {std0, std1, std2};
14 | 
15 | 		int pos_real = position * 3;
16 | 		unsigned char* src_real = src + pos_real;
17 | 		for (int c = 0; c < 3; ++c) {
18 | 			unsigned char value = src_real[c];
19 | 			float* dest_ptr = pptr_dest[c];
20 | 			dest_ptr[position] = (value / 255.0f - mean[c]) / std[c];
21 | 		}
22 | 	}
23 | 
24 | 	void ImageNormMeanStd_forwardGPU(float* d0, float* d1, float* d2, float mean[3], float std[3], unsigned char* src, int nump, cudaStream_t stream) {
25 | 
26 | 		auto grid = gridDims(nump);
27 | 		auto block = blockDims(nump);
28 | 
29 | 		if (stream) {
30 | 			ImageNormMeanStd_forwardGPU_impl << <grid, block, 0, stream >> > (d0, d1, d2, mean[0], mean[1], mean[2], std[0], std[1], std[2], src, nump);
31 | 		}
32 | 		else {
33 | 			ImageNormMeanStd_forwardGPU_impl << <grid, block >> > (d0, d1, d2, mean[0], mean[1], mean[2], std[0], std[1], std[2], src, nump);
34 | 		}
35 | 	}
36 | };


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | 
 3 | #include <cc_util.hpp>
 4 | #include "infer/trt_infer.hpp"
 5 | 
 6 | namespace examples {
 7 | 
 8 | 	// src/examples/*.cpp
 9 | 	void center_net_coco2x_dcn();
10 | 	void center_track_coco_tracking();
11 | 	void dbface();
12 | 	void onnx();
13 | };
14 | 
15 | int main() {
16 | 
17 | 	//save logs to file
18 | 	ccutil::setLoggerSaveDirectory("logs");
19 | 	TRTInfer::setDevice(0);
20 | 
21 | 	examples::onnx();
22 | 	examples::dbface();
23 | 	examples::center_net_coco2x_dcn();
24 | 	examples::center_track_coco_tracking();
25 | 	return 0;
26 | }


--------------------------------------------------------------------------------
/src/onnx/onnx_pb.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) ONNX Project Contributors.
 2 | // Licensed under the MIT license.
 3 | 
 4 | #ifndef ONNX_ONNX_PB_H
 5 | #define ONNX_ONNX_PB_H
 6 | 
 7 | // Defines ONNX_EXPORT and ONNX_IMPORT. On Windows, this corresponds to
 8 | // different declarations (dllexport and dllimport). On Linux/Mac, it just
 9 | // resolves to the same "default visibility" setting.
10 | #if defined(_MSC_VER)
11 | #if defined(ONNX_BUILD_SHARED_LIBS) || defined(ONNX_BUILD_MAIN_LIB)
12 | #define ONNX_EXPORT __declspec(dllexport)
13 | #define ONNX_IMPORT __declspec(dllimport)
14 | #else
15 | #define ONNX_EXPORT
16 | #define ONNX_IMPORT
17 | #endif
18 | #else
19 | #if defined(__GNUC__)
20 | #define ONNX_EXPORT __attribute__((__visibility__("default")))
21 | #else
22 | #define ONNX_EXPORT
23 | #endif
24 | #define ONNX_IMPORT ONNX_EXPORT
25 | #endif
26 | 
27 | // ONNX_API is a macro that, depends on whether you are building the
28 | // main ONNX library or not, resolves to either ONNX_EXPORT or
29 | // ONNX_IMPORT.
30 | //
31 | // This is used in e.g. ONNX's protobuf files: when building the main library,
32 | // it is defined as ONNX_EXPORT to fix a Windows global-variable-in-dll
33 | // issue, and for anyone dependent on ONNX it will be defined as
34 | // ONNX_IMPORT. ONNX_BUILD_MAIN_LIB can also be set when being built
35 | // statically if ONNX is being linked into a shared library that wants
36 | // to export the ONNX APIs and classes.
37 | //
38 | // More details on Windows dllimport / dllexport can be found at
39 | // https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx
40 | //
41 | // This solution is similar to
42 | // https://github.com/pytorch/pytorch/blob/master/caffe2/core/common.h
43 | #define ONNX_API
44 | #include "onnx_ONNX_NAMESPACE-ml.pb.h"
45 | 
46 | #endif // ! ONNX_ONNX_PB_H
47 | 


--------------------------------------------------------------------------------
/src/onnx_parser/ImporterContext.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #pragma once
 24 | 
 25 | #include "onnx2trt.hpp"
 26 | 
 27 | #include <list>
 28 | #include <unordered_map>
 29 | 
 30 | namespace onnx2trt {
 31 | 
 32 | class ImporterContext final : public IImporterContext {
 33 |   nvinfer1::INetworkDefinition* _network;
 34 |   nvinfer1::ILogger* _logger;
 35 |   std::list<UniqueOwnable> _owned_plugin_instances;
 36 |   std::list<std::vector<uint8_t>> _temp_bufs;
 37 |   std::unordered_map<std::string, nvinfer1::ITensor*>  _user_inputs;
 38 |   std::unordered_map<std::string, nvinfer1::ITensor**> _user_outputs;
 39 |   std::unordered_map<std::string, int64_t> _opsets;
 40 | public:
 41 |   ImporterContext(nvinfer1::INetworkDefinition* network,
 42 |                   nvinfer1::ILogger* logger)
 43 |     : _network(network), _logger(logger) {}
 44 |   virtual nvinfer1::INetworkDefinition* network() override { return _network; }
 45 |   nvinfer1::ILogger& logger() { return *_logger; }
 46 |   virtual ShapedWeights createTempWeights(ShapedWeights::DataType type,
 47 |                                           nvinfer1::Dims shape) override {
 48 |     ShapedWeights weights(type, nullptr, shape);
 49 |     _temp_bufs.push_back(std::vector<uint8_t>(weights.size_bytes()));
 50 |     weights.values = _temp_bufs.back().data();
 51 |     return weights;
 52 |   }
 53 |   virtual nvinfer1::IPluginV2Layer* addPluginV2(PluginV2* plugin,
 54 |                                             std::vector<nvinfer1::ITensor*> const& inputs) override {
 55 |     // Note: Plugins are wrapped here to make them work with
 56 |     // onnx2trt::PluginFactory.
 57 |     auto* wrapped_plugin = new TypeSerializingPlugin(plugin);
 58 |     _owned_plugin_instances.emplace_back(wrapped_plugin);
 59 | #if NV_TENSORRT_MAJOR > 4
 60 |     return _network->addPluginV2(inputs.data(), inputs.size(), *wrapped_plugin);
 61 | #endif
 62 |   }
 63 |   bool setUserInput(const char* name, nvinfer1::ITensor* input) {
 64 |     _user_inputs[name] = input;
 65 |     return true;
 66 |   }
 67 |   bool setUserOutput(const char* name, nvinfer1::ITensor** output) {
 68 |     _user_outputs[name] = output;
 69 |     return true;
 70 |   }
 71 |   nvinfer1::ITensor* getUserInput(const char* name) {
 72 |     if( !_user_inputs.count(name) ) {
 73 |       return nullptr;
 74 |     } else {
 75 |       return _user_inputs.at(name);
 76 |     }
 77 |   }
 78 |   nvinfer1::ITensor** getUserOutput(const char* name) {
 79 |     if( !_user_outputs.count(name) ) {
 80 |       return nullptr;
 81 |     } else {
 82 |       return _user_outputs.at(name);
 83 |     }
 84 |   }
 85 |   std::unordered_map<std::string, nvinfer1::ITensor**> const& getUserOutputs() const {
 86 | 	  return _user_outputs;
 87 |   }
 88 |   void clearOpsets() { _opsets.clear(); }
 89 |   void addOpset(std::string domain, int64_t version) {
 90 |     _opsets.emplace(domain, version);
 91 |   }
 92 |   virtual int64_t getOpsetVersion(const char* domain="") const override {
 93 |     if (_opsets.empty()) {
 94 |       return 1;
 95 |     } else if (_opsets.size() == 1) {
 96 |       return _opsets.begin()->second;
 97 |     } else {
 98 |       assert(_opsets.count(domain));
 99 |       return _opsets.at(domain);
100 |     }
101 |   }
102 | };
103 | 
104 | } // namespace onnx2trt
105 | 


--------------------------------------------------------------------------------
/src/onnx_parser/InstanceNormalization.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #pragma once
 24 | #include <NvInfer.h>
 25 | 
 26 | #include "plugin.hpp"
 27 | #include "serialize.hpp"
 28 | #include <cudnn.h>
 29 | #include <vector>
 30 | 
 31 | typedef unsigned short half_type;
 32 | 
 33 | namespace {
 34 |     constexpr const char* INSTANCE_PLUGIN_VERSION{"001"};
 35 |     constexpr const char* INSTANCE_PLUGIN_NAME{"InstanceNormalization"};
 36 | }
 37 | 
 38 | class InstanceNormalizationPlugin final : public onnx2trt::PluginV2 {
 39 |   float _epsilon;
 40 |   int   _nchan;
 41 |   std::vector<float> _h_scale;
 42 |   std::vector<float> _h_bias;
 43 |   float* _d_scale;
 44 |   float* _d_bias;
 45 |   bool _initialized;
 46 |   nvinfer1::Weights _scale, _bias;
 47 |   cudnnHandle_t _cudnn_handle;
 48 |   cudnnTensorDescriptor_t _x_desc, _y_desc, _b_desc;
 49 | protected:
 50 |   void deserialize(void const* serialData, size_t serialLength) {
 51 |     deserializeBase(serialData, serialLength);
 52 |     deserialize_value(&serialData, &serialLength, &_epsilon);
 53 |     deserialize_value(&serialData, &serialLength, &_nchan);
 54 |     deserialize_value(&serialData, &serialLength, &_h_scale);
 55 |     deserialize_value(&serialData, &serialLength, &_h_bias);
 56 |   }
 57 |   size_t getSerializationSize() const override {
 58 |     return (serialized_size(_epsilon) +
 59 |             serialized_size(_nchan) +
 60 |             serialized_size(_h_scale) +
 61 |             serialized_size(_h_bias)) + getBaseSerializationSize();
 62 |   }
 63 |   void serialize(void *buffer) const override {
 64 |     serializeBase(buffer);
 65 |     serialize_value(&buffer, _epsilon);
 66 |     serialize_value(&buffer, _nchan);
 67 |     serialize_value(&buffer, _h_scale);
 68 |     serialize_value(&buffer, _h_bias);
 69 |   }
 70 | public:
 71 |   InstanceNormalizationPlugin(float epsilon,
 72 |                               nvinfer1::Weights const& scale,
 73 |                               nvinfer1::Weights const& bias);
 74 |   InstanceNormalizationPlugin(void const* serialData, size_t serialLength) : _initialized(false) {
 75 |     this->deserialize(serialData, serialLength);
 76 |   }
 77 |   const char* getPluginType() const override { return INSTANCE_PLUGIN_NAME; }
 78 | 
 79 |   virtual void destroy() override { delete this; }
 80 | 
 81 |   virtual nvinfer1::IPluginV2* clone() const override { return new InstanceNormalizationPlugin{_epsilon, _scale, _bias}; }
 82 | 
 83 |   virtual const char* getPluginVersion() const override { return INSTANCE_PLUGIN_VERSION; }
 84 | 
 85 |   virtual void setPluginNamespace(const char* pluginNamespace) override {}
 86 | 
 87 |   virtual const char* getPluginNamespace() const override { return ""; }
 88 | 
 89 |   bool supportsFormat(nvinfer1::DataType type,
 90 |                       nvinfer1::PluginFormat format) const override;
 91 |   int getNbOutputs() const override { return 1; }
 92 |   nvinfer1::Dims getOutputDimensions(int index,
 93 |                                      const nvinfer1::Dims *inputDims,
 94 |                                      int nbInputs) override {
 95 |     assert(index == 0);
 96 |     assert(inputDims);
 97 |     assert(nbInputs == 1);
 98 |     return *inputDims;
 99 |   }
100 |   int initialize() override;
101 |   void terminate() override;
102 |   int enqueue(int batchSize,
103 |               const void *const *inputs, void **outputs,
104 |               void *workspace, cudaStream_t stream) override;
105 |   size_t getWorkspaceSize(int maxBatchSize) const override;
106 |   ~InstanceNormalizationPlugin();
107 | };
108 | 
109 | class InstanceNormalizationPluginCreator : public nvinfer1::IPluginCreator
110 | {
111 | public:
112 |   InstanceNormalizationPluginCreator() {}
113 | 
114 |   ~InstanceNormalizationPluginCreator() {}
115 | 
116 |   const char* getPluginName() const { return INSTANCE_PLUGIN_NAME; }
117 | 
118 |   const char* getPluginVersion() const { return INSTANCE_PLUGIN_VERSION; }
119 | 
120 |   const nvinfer1::PluginFieldCollection* getFieldNames() { std::cerr<< "Function not implemented" << std::endl; return nullptr; }
121 | 
122 |   nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) { std::cerr<< "Function not implemented" << std::endl; return nullptr; }
123 | 
124 |   nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) { return new InstanceNormalizationPlugin{serialData, serialLength}; }
125 | 
126 |   void setPluginNamespace(const char* libNamespace) { mNamespace = libNamespace; }
127 | 
128 |   const char* getPluginNamespace() const { return mNamespace.c_str(); }
129 | private:
130 |     std::string mNamespace;
131 | };
132 | 
133 | REGISTER_TENSORRT_PLUGIN(InstanceNormalizationPluginCreator);
134 | 


--------------------------------------------------------------------------------
/src/onnx_parser/ModelImporter.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include "NvOnnxParser.h"
26 | #include "ImporterContext.hpp"
27 | #include "common.hpp"
28 | #include "utils.hpp"
29 | #include "builtin_op_importers.hpp"
30 | 
31 | namespace onnx2trt {
32 | 
33 | class ModelImporter final : public nvonnxparser::IParser {
34 |   string_map<NodeImporter> _op_importers;
35 |   ImporterContext _importer_ctx;
36 |   std::list<::ONNX_NAMESPACE::ModelProto> _onnx_models; // Needed for ownership of weights
37 |   int _current_node;
38 |   std::vector<Status> _errors;
39 |   std::vector<nvinfer1::Dims> _inputDims;
40 | 
41 |   Status importModel(::ONNX_NAMESPACE::ModelProto const &model,
42 |                      uint32_t weight_count,
43 |                      onnxTensorDescriptorV1 const *weight_descriptors);
44 |   NodeImportResult importNode(::ONNX_NAMESPACE::NodeProto const& node,
45 |                               std::vector<TensorOrWeights>& inputs,
46 |                               std::vector<std::string>& output_names);
47 | public:
48 |   ModelImporter(nvinfer1::INetworkDefinition* network,
49 |                 nvinfer1::ILogger* logger, const std::vector<nvinfer1::Dims>& dims)
50 |     : _op_importers(getBuiltinOpImporterMap()),
51 |       _importer_ctx(network, logger), _inputDims(dims) {}
52 |   bool parseWithWeightDescriptors(
53 |       void const *serialized_onnx_model, size_t serialized_onnx_model_size,
54 |       uint32_t weight_count,
55 |       onnxTensorDescriptorV1 const *weight_descriptors) override;
56 |   bool parse(void const *serialized_onnx_model,
57 |              size_t serialized_onnx_model_size) override;
58 |   bool parseFromFile(const char* onnxModelFile, int verbosity) override;
59 |   bool supportsModel(void const *serialized_onnx_model,
60 |                      size_t serialized_onnx_model_size,
61 |          SubGraphCollection_t &sub_graph_collection) override;
62 | 
63 |   bool supportsOperator(const char* op_name) const override;
64 |   void destroy() override { delete this; }
65 |   //virtual void registerOpImporter(std::string op,
66 |   //                                NodeImporter const &node_importer) override {
67 |   //  // Note: This allows existing importers to be replaced
68 |   //  _op_importers[op] = node_importer;
69 |   //}
70 |   //virtual Status const &setInput(const char *name,
71 |   //                               nvinfer1::ITensor *input) override;
72 |   //virtual Status const& setOutput(const char* name, nvinfer1::ITensor** output) override;
73 |   int getNbErrors() const override { return _errors.size(); }
74 |   nvonnxparser::IParserError const* getError(int index) const override {
75 |     assert(0 <= index && index < (int)_errors.size());
76 |     return &_errors[index];
77 |   }
78 |   void clearErrors() override { _errors.clear(); }
79 | };
80 | 
81 | } // namespace onnx2trt
82 | 


--------------------------------------------------------------------------------
/src/onnx_parser/NvOnnxParser.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #include "NvOnnxParser.h"
24 | #include "ModelImporter.hpp"
25 | 
26 | void* createNvOnnxParser_INTERNAL(void* network_, void* logger_, int version, std::vector<nvinfer1::Dims> inputDims) {
27 |   auto network = static_cast<nvinfer1::INetworkDefinition*>(network_);
28 |   auto logger  = static_cast<nvinfer1::ILogger*>(logger_);
29 |   return new onnx2trt::ModelImporter(network, logger, inputDims);
30 | }
31 | 
32 | int getNvOnnxParserVersion() {
33 |   return NV_ONNX_PARSER_VERSION;
34 | }
35 | 


--------------------------------------------------------------------------------
/src/onnx_parser/NvOnnxParserTypedefs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | #pragma once
23 | 
24 | #include <stddef.h>
25 | #include <vector>
26 | 
27 | typedef std::size_t NodeProtoUniqueID;
28 | typedef std::vector<NodeProtoUniqueID>  NodesContainer_t;
29 | typedef std::pair<std::vector<NodeProtoUniqueID>, bool> SubGraph_t;
30 | typedef std::vector<SubGraph_t> SubGraphCollection_t; 


--------------------------------------------------------------------------------
/src/onnx_parser/OnnxAttrs.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #include "OnnxAttrs.hpp"
24 | #include "ShapedWeights.hpp"
25 | #include "onnx2trt_utils.hpp"
26 | 
27 | template<> float OnnxAttrs::get<float>(std::string key) const {
28 |   return this->at(key)->f();
29 | }
30 | template<> int OnnxAttrs::get<int>(std::string key) const {
31 |   return this->at(key)->i();
32 | }
33 | template<> bool OnnxAttrs::get<bool>(std::string key) const {
34 |   int value = this->at(key)->i();
35 |   assert(value == bool(value));
36 |   return bool(value);
37 | }
38 | template<> std::string OnnxAttrs::get<std::string>(std::string key) const {
39 |   return this->at(key)->s();
40 | }
41 | template<> std::vector<int> OnnxAttrs::get<std::vector<int>>(std::string key) const {
42 |   auto attr = this->at(key)->ints();
43 |   return std::vector<int>(attr.begin(), attr.end());
44 | }
45 | 
46 | template<> std::vector<int64_t> OnnxAttrs::get<std::vector<int64_t>>(std::string key) const {
47 |   auto attr = this->at(key)->ints();
48 |   return std::vector<int64_t>(attr.begin(), attr.end());
49 | }
50 | 
51 | template<> std::vector<float> OnnxAttrs::get<std::vector<float>>(std::string key) const {
52 |   auto attr = this->at(key)->floats();
53 |   return std::vector<float>(attr.begin(), attr.end());
54 | }
55 | template<> nvinfer1::Dims OnnxAttrs::get<nvinfer1::Dims>(std::string key) const {
56 |   auto values = this->get<std::vector<int>>(key);
57 |   nvinfer1::Dims dims;
58 |   dims.nbDims = values.size();
59 |   std::copy(values.begin(), values.end(), dims.d);
60 |   // Note: No dimension type information is included
61 |   return dims;
62 | }
63 | template<> nvinfer1::DimsHW OnnxAttrs::get<nvinfer1::DimsHW>(std::string key) const {
64 |   nvinfer1::Dims dims = this->get<nvinfer1::Dims>(key);
65 |   assert(dims.nbDims == 2);
66 |   return nvinfer1::DimsHW(dims.d[0], dims.d[1]);
67 | }
68 | template<> nvinfer1::Permutation OnnxAttrs::get<nvinfer1::Permutation>(std::string key) const {
69 |   auto values = this->get<std::vector<int>>(key);
70 |   nvinfer1::Permutation perm;
71 |   std::copy(values.begin(), values.end(), perm.order);
72 |   // Fill unused values with identity permutation
73 |   for( int i=values.size(); i<nvinfer1::Dims::MAX_DIMS; ++i ) {
74 |     perm.order[i] = i;
75 |   }
76 |   return perm;
77 | }
78 | template<> onnx2trt::ShapedWeights OnnxAttrs::get<onnx2trt::ShapedWeights>(std::string key) const {
79 |   ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = this->at(key)->t();
80 |   onnx2trt::ShapedWeights weights;
81 |   convert_onnx_weights(onnx_weights_tensor, &weights);
82 |   return weights;
83 | }
84 | 


--------------------------------------------------------------------------------
/src/onnx_parser/OnnxAttrs.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include <onnx/onnx_pb.h>
26 | #include <NvInfer.h>
27 | #include <vector>
28 | #include <unordered_map>
29 | 
30 | class OnnxAttrs {
31 |   template<typename T>
32 |   using string_map = std::unordered_map<std::string, T>;
33 |   typedef string_map<::ONNX_NAMESPACE::AttributeProto const*> AttrMap;
34 |   AttrMap _attrs;
35 | public:
36 |   explicit OnnxAttrs(::ONNX_NAMESPACE::NodeProto const& onnx_node) {
37 |     for( auto const& attr : onnx_node.attribute() ) {
38 |       _attrs.insert({attr.name(), &attr});
39 |     }
40 |   }
41 |   bool count(std::string key) const { return _attrs.count(key); }
42 |   ::ONNX_NAMESPACE::AttributeProto const* at(std::string key) const {
43 |     if( !_attrs.count(key) ) {
44 |       throw std::out_of_range("Attribute not found: " + key);
45 |     }
46 |     return _attrs.at(key);
47 |   }
48 |   template<typename T> T get(std::string key) const;
49 |   template<typename T> T get(std::string key, T const& default_value) const {
50 |     return _attrs.count(key) ? this->get<T>(key) : default_value;
51 |   }
52 | };
53 | 


--------------------------------------------------------------------------------
/src/onnx_parser/ResizeNearest.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include <cuda_fp16.h>
 24 | #include <cassert>
 25 | #include <algorithm>
 26 | #include "ResizeNearest.hpp"
 27 | 
 28 | // TODO: Move this to a common header
 29 | inline bool is_CHW(nvinfer1::Dims const& dims) {
 30 |   return (dims.nbDims == 3 &&
 31 |           dims.type[0] == nvinfer1::DimensionType::kCHANNEL &&
 32 |           dims.type[1] == nvinfer1::DimensionType::kSPATIAL &&
 33 |           dims.type[2] == nvinfer1::DimensionType::kSPATIAL);
 34 | }
 35 | 
 36 | nvinfer1::Dims ResizeNearestPlugin::getOutputDimensions(int index,
 37 |                                                         const nvinfer1::Dims *inputDims,
 38 |                                                         int nbInputs) {
 39 |   assert(nbInputs == 1);
 40 |   nvinfer1::Dims const& input = inputDims[0];
 41 |   assert(is_CHW(input));
 42 |   assert(_ndims == 2);
 43 |   assert(index == 0);
 44 |   nvinfer1::Dims output;
 45 |   output.nbDims = input.nbDims;
 46 |   int s = 0;
 47 |   for( int d=0; d<input.nbDims; ++d ) {
 48 |     output.type[d] = input.type[d];
 49 |     if( input.type[d] == nvinfer1::DimensionType::kSPATIAL ) {
 50 |       output.d[d] = int(input.d[d] * _scale[s++]);
 51 |     } else {
 52 |       output.d[d] = input.d[d];
 53 |     }
 54 |   }
 55 |   return output;
 56 | }
 57 | 
 58 | int ResizeNearestPlugin::initialize() {
 59 |   _output_dims = this->getOutputDimensions(0, &this->getInputDims(0), 1);
 60 |   assert(is_CHW(this->getInputDims(0)));
 61 |   assert(is_CHW(_output_dims));
 62 |   assert(_ndims == 2);
 63 |   return 0;
 64 | }
 65 | 
 66 | template <typename Data>
 67 | __global__
 68 | void resize_nearest_kernel_2d(int nbatch,
 69 |                               float2 scale,
 70 |                               int2 osize,
 71 |                               Data const* idata, int istride, int ibatchstride,
 72 |                               Data*       odata, int ostride, int obatchstride) {
 73 |   int x0 = threadIdx.x + blockIdx.x * blockDim.x;
 74 |   int y0 = threadIdx.y + blockIdx.y * blockDim.y;
 75 |   int z0 = blockIdx.z;
 76 |   for( int batch=z0; batch<nbatch; batch+=gridDim.z ) {
 77 |     for( int oy=y0; oy<osize.y; oy+=blockDim.y*gridDim.y ) {
 78 |       for( int ox=x0; ox<osize.x; ox+=blockDim.x*gridDim.x ) {
 79 |         int ix = int(ox / scale.x);
 80 |         int iy = int(oy / scale.y);
 81 |         odata[batch * obatchstride + oy * ostride + ox] =
 82 |           idata[batch * ibatchstride + iy * istride + ix];
 83 |       }
 84 |     }
 85 |   }
 86 | }
 87 | 
 88 | int ResizeNearestPlugin::enqueue(int batchSize,
 89 |                                  const void *const *inputs, void **outputs,
 90 |                                  void *workspace, cudaStream_t stream) {
 91 |   auto const& input_dims = this->getInputDims(0);
 92 |   int nchan = input_dims.d[0];
 93 |   switch( _ndims ) {
 94 |   case 2: {
 95 |     float2 scale = {_scale[1], _scale[0]};
 96 |     int2 osize = {_output_dims.d[2], _output_dims.d[1]};
 97 |     int istride =   input_dims.d[2];
 98 |     int ostride = _output_dims.d[2];
 99 |     int ibatchstride =   input_dims.d[1] * istride;
100 |     int obatchstride = _output_dims.d[1] * ostride;
101 |     dim3 block(32, 16);
102 |     dim3 grid((osize.x - 1) / block.x + 1,
103 |               (osize.y - 1) / block.y + 1,
104 |               std::min(batchSize * nchan, 65535));
105 |     if (getDataType()==nvinfer1::DataType::kFLOAT) {        
106 |       resize_nearest_kernel_2d<<<grid, block, 0, stream>>>
107 |         (batchSize * nchan, scale, osize,
108 |          static_cast<float const*>( inputs[0]), istride, ibatchstride,
109 |          static_cast<float*      >(outputs[0]), ostride, obatchstride);
110 |     } else {
111 |       resize_nearest_kernel_2d<<<grid, block, 0, stream>>>
112 |         (batchSize * nchan, scale, osize,
113 |          static_cast<__half const*>( inputs[0]), istride, ibatchstride,
114 |          static_cast<__half*      >(outputs[0]), ostride, obatchstride);
115 |     }
116 |     return cudaGetLastError() != cudaSuccess;
117 |   }
118 |   default: return -1;
119 |   }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/onnx_parser/ResizeNearest.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #pragma once
 24 | #include <NvInfer.h>
 25 | 
 26 | #include "plugin.hpp"
 27 | #include "serialize.hpp"
 28 | 
 29 | #include <cassert>
 30 | 
 31 | namespace {
 32 |     constexpr const char* RESIZE_PLUGIN_VERSION{"001"};
 33 |     constexpr const char* RESIZE_PLUGIN_NAME{"ResizeNearest"};
 34 | }
 35 | 
 36 | class ResizeNearestPlugin final : public onnx2trt::PluginV2 {
 37 |   int   _ndims;
 38 |   float _scale[nvinfer1::Dims::MAX_DIMS];
 39 |   nvinfer1::Dims _output_dims;
 40 | protected:
 41 |   void deserialize(void const* serialData, size_t serialLength) {
 42 |     deserializeBase(serialData, serialLength);
 43 |     deserialize_value(&serialData, &serialLength, &_ndims);
 44 |     deserialize_value(&serialData, &serialLength, &_scale);
 45 |   }
 46 |   size_t getSerializationSize() const override {
 47 |     return serialized_size(_ndims) + serialized_size(_scale) + getBaseSerializationSize();
 48 |   }
 49 |   void serialize(void *buffer) const override {
 50 |     serializeBase(buffer);
 51 |     serialize_value(&buffer, _ndims);
 52 |     serialize_value(&buffer, _scale);
 53 |   }
 54 | public:
 55 |   ResizeNearestPlugin(std::vector<float> const& scale)
 56 |     : _ndims(scale.size()) {
 57 |     assert(scale.size() <= nvinfer1::Dims::MAX_DIMS);
 58 |     std::copy(scale.begin(), scale.end(), _scale);
 59 |   }
 60 |   ResizeNearestPlugin(void const* serialData, size_t serialLength) {
 61 |     this->deserialize(serialData, serialLength);
 62 |   }
 63 |   virtual const char* getPluginType() const override { return RESIZE_PLUGIN_NAME; }
 64 | 
 65 |   virtual void destroy() override { delete this; }
 66 | 
 67 |   virtual nvinfer1::IPluginV2* clone() const override { return new ResizeNearestPlugin{std::vector<float>(_scale, _scale + _ndims)}; }
 68 | 
 69 |   virtual const char* getPluginVersion() const override { return RESIZE_PLUGIN_VERSION; }
 70 | 
 71 |   virtual void setPluginNamespace(const char* pluginNamespace) override {}
 72 | 
 73 |   virtual const char* getPluginNamespace() const override { return ""; }
 74 | 
 75 |   virtual int getNbOutputs() const override { return 1; }
 76 |   virtual nvinfer1::Dims getOutputDimensions(int index,
 77 |                                              const nvinfer1::Dims *inputs, int nbInputDims) override;
 78 |   virtual int initialize() override;
 79 |   int enqueue(int batchSize,
 80 |               const void *const *inputs, void **outputs,
 81 |               void *workspace, cudaStream_t stream) override;
 82 | };
 83 | 
 84 | class ResizeNearestPluginCreator : public nvinfer1::IPluginCreator
 85 | {
 86 | public:
 87 |   ResizeNearestPluginCreator() {}
 88 | 
 89 |   ~ResizeNearestPluginCreator() {}
 90 | 
 91 |   const char* getPluginName() const { return RESIZE_PLUGIN_NAME; }
 92 | 
 93 |   const char* getPluginVersion() const { return RESIZE_PLUGIN_VERSION; }
 94 | 
 95 |   const nvinfer1::PluginFieldCollection* getFieldNames() { std::cerr<< "Function not implemented" << std::endl; return nullptr; }
 96 | 
 97 |   nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) { std::cerr<< "Function not implemented" << std::endl; return nullptr; }
 98 | 
 99 |   nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) { return new ResizeNearestPlugin{serialData, serialLength}; }
100 | 
101 |   void setPluginNamespace(const char* libNamespace) { mNamespace = libNamespace; }
102 | 
103 |   const char* getPluginNamespace() const { return mNamespace.c_str(); }
104 | private:
105 |     std::string mNamespace;
106 | };
107 | 
108 | REGISTER_TENSORRT_PLUGIN(ResizeNearestPluginCreator);
109 | 


--------------------------------------------------------------------------------
/src/onnx_parser/ShapedWeights.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include "ShapedWeights.hpp"
 24 | #include "trt_utils.hpp"
 25 | #include "onnx2trt_utils.hpp"
 26 | 
 27 | namespace onnx2trt {
 28 | 
 29 | bool convertINT64(void* weightValues, const size_t nbWeights, std::vector<int32_t>& converted_weights)
 30 | {
 31 |   int64_t * weights = static_cast<int64_t *>(weightValues);
 32 |   for (size_t i = 0; i < nbWeights; i++)
 33 |   {
 34 |     if (weights[i] > INT32_MAX || weights[i] < INT32_MIN)
 35 |     {
 36 |       return false;
 37 |     }
 38 |     else
 39 |     {
 40 |       converted_weights[i] = static_cast<int32_t>(weights[i]);
 41 |     }
 42 |   }
 43 |   return true;
 44 | }
 45 | 
 46 | size_t ShapedWeights::count() const {
 47 |   if( this->values == nullptr && this->shape.nbDims == 0 )
 48 |   {
 49 |     return 0;
 50 |   }
 51 |   else 
 52 |   {
 53 |     // TRT supports scalars, so 0D tensors should have a count of 1.
 54 |     size_t c = 1;
 55 |     for( int i=0; i<this->shape.nbDims; ++i )
 56 |     {
 57 |       c *= this->shape.d[i];
 58 |     }
 59 |     return c;
 60 |   }
 61 | }
 62 | 
 63 | ShapedWeights ShapedWeights::empty(DataType type) {
 64 |   return ShapedWeights(type, nullptr, nvinfer1::Dims{0});
 65 | }
 66 | 
 67 | ShapedWeights::ShapedWeights() : values(nullptr), shape{0} {}
 68 | 
 69 | ShapedWeights::ShapedWeights(DataType type_, void* values_, nvinfer1::Dims shape_)
 70 |   : type(type_), values(values_), shape(shape_) {
 71 |   // Note: this->shape.type[] is not used
 72 | }
 73 | 
 74 | size_t ShapedWeights::size_bytes() const {
 75 |   return this->count() * get_dtype_size(this->type);
 76 | }
 77 | 
 78 | ShapedWeights::operator bool() const {
 79 | return (bool)this->values;
 80 | }
 81 | 
 82 | ShapedWeights::operator nvinfer1::Weights() const {
 83 |   nvinfer1::Weights w {};
 84 |   // If INT64 weights, check if all the values can be cast down to INT32.
 85 |   if (this->type == ::ONNX_NAMESPACE::TensorProto::INT64)
 86 |   {
 87 |     cout << "WARNING: Your ONNX model has been generated with INT64 weights, "
 88 |          << "while TensorRT does not natively support INT64. "
 89 |          << "Attempting to cast down to INT32." << endl;
 90 |     std::vector<int32_t> int32_weights;
 91 |     int32_weights.resize(this->count());
 92 | 
 93 |     if (!onnx2trt::convertINT64(this->values, this->count(), int32_weights))
 94 |     {
 95 |       cerr << "ERROR: Weights cannot be cast down to INT32." << endl;
 96 |       // Return empty w on failure
 97 |       return w;
 98 |     }
 99 |     else
100 |     {
101 |       void * int32_weights_ptr = static_cast<void *>(int32_weights.data());
102 |       std::memcpy(this->values, int32_weights_ptr, int32_weights.size() * sizeof(int32_t));
103 |       w.values = this->values;
104 |       cout << "Successfully casted down to INT32." << endl;
105 |     }
106 |   }
107 |   else
108 |   {
109 |     w.values = this->values;
110 |   }
111 |   bool supported_type = convert_dtype(this->type, &w.type);
112 |   (void)supported_type;
113 |   assert(supported_type);
114 |   w.count = this->count();
115 |   return w;
116 | }
117 | 
118 | template<typename DType>
119 | void transpose2DWeights(ShapedWeights const& weights,
120 |                         nvinfer1::Dims const& new_shape,
121 |                         ShapedWeights* result) {
122 |   DType const* src = reinterpret_cast<DType*>(weights.values);
123 |   DType*       dst = reinterpret_cast<DType*>(result->values);
124 |   int src_stride = weights.shape.d[1];
125 |   int dst_stride = result->shape.d[1];
126 |   for (int i = 0; i < new_shape.d[0]; ++i) {
127 |     for (int j = 0; j < new_shape.d[1]; ++j) {
128 |       dst[i * dst_stride + j] = src[j * src_stride + i];
129 |     }
130 |   }
131 | }
132 |   
133 | bool transposeWeights(ShapedWeights const& weights,
134 |                       nvinfer1::Permutation const& perm,
135 |                       ShapedWeights* result) {
136 |   nvinfer1::Dims shape = weights.shape;
137 |   nvinfer1::Dims new_shape;
138 |   new_shape.nbDims = shape.nbDims;
139 |   for( int d=0; d<shape.nbDims; ++d ) {
140 |     new_shape.d[d] = shape.d[perm.order[d]];
141 |     result->shape.d[d] = new_shape.d[d];
142 |   }
143 |   // TODO: Need to generalize this transpose implementation
144 |   assert(perm.order[0] == 1 && perm.order[1] == 0);
145 |   if (shape.nbDims == 2) {
146 |     if (weights.type == ::ONNX_NAMESPACE::TensorProto::FLOAT) {
147 |       transpose2DWeights<float>(weights, new_shape, result);
148 |     } else if (weights.type == ::ONNX_NAMESPACE::TensorProto::FLOAT16) {
149 |       transpose2DWeights<uint16_t>(weights, new_shape, result);
150 |     } else {
151 |       return false;
152 |     }
153 |   } else {
154 |     // TODO: Implement general transposes and multiple data types
155 |     // Unsupported weights transpose
156 |     return false;
157 |   }
158 |   return true;
159 | }
160 | 
161 | } // namespace onnx2trt
162 | 


--------------------------------------------------------------------------------
/src/onnx_parser/ShapedWeights.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include <onnx/onnx_pb.h>
26 | #include <NvInfer.h>
27 | 
28 | namespace onnx2trt {
29 | 
30 | class ShapedWeights {
31 | public:
32 |   typedef int32_t DataType;
33 |   DataType type;
34 |   void* values;
35 |   nvinfer1::Dims shape;
36 |   static ShapedWeights empty(DataType type);
37 |   ShapedWeights();
38 |   explicit ShapedWeights(DataType type, void* values, nvinfer1::Dims shape_);
39 |   size_t count() const;
40 |   size_t size_bytes() const;
41 |   operator bool() const;
42 |   operator nvinfer1::Weights() const;
43 | };
44 | 
45 | bool transposeWeights(ShapedWeights const& weights,
46 |                       nvinfer1::Permutation const& perm,
47 |                       ShapedWeights* result);
48 | 
49 | bool convertINT64(void * weightValues, const size_t nbWeights, std::vector<int32_t>& converted_weights);
50 | 
51 | } // namespace onnx2trt
52 | 


--------------------------------------------------------------------------------
/src/onnx_parser/Split.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include <algorithm>
 24 | #include <cuda_fp16.h>
 25 | #include <cassert>
 26 | 
 27 | #include "Split.hpp"
 28 | 
 29 | nvinfer1::Dims SplitPlugin::getOutputDimensions(int index,
 30 |                                                 const nvinfer1::Dims *inputDims,
 31 |                                                 int nbInputs) {
 32 |   assert(nbInputs == 1);
 33 |   assert(index < this->getNbOutputs());
 34 |   nvinfer1::Dims const& input_dims = inputDims[0];
 35 |   nvinfer1::Dims output_dims = input_dims;
 36 |   output_dims.d[_axis] = _output_lengths.at(index);
 37 |   return output_dims;
 38 | }
 39 | 
 40 | int SplitPlugin::initialize() {
 41 |   std::vector<int> segment_offsets(1, 0);
 42 |   for( int i=0; i<this->getNbOutputs(); ++i ) {
 43 |     segment_offsets.push_back(segment_offsets.back() + _output_lengths[i]);
 44 |   }
 45 |   _d_segment_offsets = segment_offsets;
 46 |   nvinfer1::Dims dims = this->getInputDims(0);
 47 |   _nx = 1;
 48 |   for( int i=dims.nbDims-1; i>_axis; --i ) {
 49 |     _nx *= dims.d[i];
 50 |   }
 51 |   _ny = dims.d[_axis];
 52 |   _nz = 1;
 53 |   for( int i=_axis-1; i>=0; --i ) {
 54 |     _nz *= dims.d[i];
 55 |   }
 56 |   _d_output_ptrs.resize(this->getNbOutputs(), nullptr);
 57 |   return 0;
 58 | }
 59 | 
 60 | template<typename T>
 61 | __device__
 62 | int upper_bound(T const* vals, int n, T const& key) {
 63 |   int i = 0;
 64 |   while( n > 0 ) {
 65 |     int m = n / 2;
 66 |     int j = i + m;
 67 |     if( !(key < vals[j]) ) {
 68 |       i  = j + 1;
 69 |       n -= m + 1;
 70 |     } else {
 71 |       n = m;
 72 |     }
 73 |   }
 74 |   return i;
 75 | }
 76 | 
 77 | template<typename T>
 78 | __global__
 79 | void split_kernel(int nsegment,
 80 |                   int const* __restrict__ segment_offsets,
 81 |                   T   const* __restrict__ idata,
 82 |                   T*  const* odatas,
 83 |                   int nx,
 84 |                   int src_ny,
 85 |                   int nz) {
 86 |   int x0     = threadIdx.x + blockIdx.x * blockDim.x;
 87 |   int src_y0 = threadIdx.y + blockIdx.y * blockDim.y;
 88 |   int z0     = threadIdx.z + blockIdx.z * blockDim.z;
 89 |   for( int z=z0; z<nz; z+=blockDim.z*gridDim.z ) {
 90 |     for( int src_y=src_y0; src_y<src_ny; src_y+=blockDim.y*gridDim.y ) {
 91 |       for( int x=x0; x<nx; x+=blockDim.x*gridDim.x ) {
 92 |   int segment = upper_bound(segment_offsets, nsegment, src_y) - 1;
 93 |   int dst_y = src_y - segment_offsets[segment];
 94 |   int dst_ny = segment_offsets[segment + 1] - segment_offsets[segment];
 95 |   odatas[segment][x + nx*(dst_y + dst_ny*z)] =
 96 |             idata[x + nx*(src_y + src_ny*z)];
 97 |       }
 98 |     }
 99 |   }
100 | }
101 | 
102 | int SplitPlugin::enqueue(int batchSize,
103 |                          const void *const *inputs, void **outputs,
104 |                          void *workspace, cudaStream_t stream) {
105 |   auto const& input_dims = this->getInputDims(0);
106 |   int const* d_segment_offsets_ptr =
107 |     thrust::raw_pointer_cast(&_d_segment_offsets[0]);
108 |   float  const* idata    = reinterpret_cast<float  const*>(inputs[0]);
109 |   float* const* h_odatas = reinterpret_cast<float* const*>(outputs);
110 |   float** odatas = thrust::raw_pointer_cast(&_d_output_ptrs[0]);
111 |   cudaError_t cuda_status =
112 |     cudaMemcpyAsync(odatas, h_odatas,
113 |                     _d_output_ptrs.size() * sizeof(float*),
114 |                     cudaMemcpyHostToDevice, stream);
115 |   if( cuda_status != cudaSuccess ) {
116 |     return 1;
117 |   }
118 |   int nz = _nz * batchSize;
119 |   dim3 block(32, 16);
120 |   dim3 grid(std::min((_nx - 1) / block.x + 1, 65535u),
121 |             std::min((_ny - 1) / block.y + 1, 65535u),
122 |             std::min((_nz - 1) / block.z + 1, 65535u));
123 |   if (getDataType()==nvinfer1::DataType::kFLOAT) {      
124 |     split_kernel<<<grid, block, 0, stream>>>
125 |       (_d_segment_offsets.size(), d_segment_offsets_ptr, idata, odatas,
126 |        _nx, _ny, nz);
127 |   } else {
128 |     split_kernel<<<grid, block, 0, stream>>>
129 |       (_d_segment_offsets.size(), d_segment_offsets_ptr, (__half const*)idata, (__half**)odatas,
130 |        _nx, _ny, nz);
131 |   }
132 |   return cudaGetLastError() != cudaSuccess;
133 | }
134 | 


--------------------------------------------------------------------------------
/src/onnx_parser/Split.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #pragma once
 24 | #include <NvInfer.h>
 25 | 
 26 | #include "plugin.hpp"
 27 | #include "serialize.hpp"
 28 | 
 29 | #include <thrust/device_vector.h>
 30 | #include <cassert>
 31 | 
 32 | namespace {
 33 |     constexpr const char* SPLIT_PLUGIN_VERSION{"001"};
 34 |     constexpr const char* SPLIT_PLUGIN_NAME{"Split"};
 35 | }
 36 | 
 37 | class SplitPlugin final : public onnx2trt::PluginV2 {
 38 |   int _axis;
 39 |   std::vector<int> _output_lengths;
 40 |   int _nx, _ny, _nz;
 41 |   int _x_stride, _y_stride, _z_stride;
 42 |   thrust::device_vector<int> _d_segment_offsets;
 43 |   thrust::device_vector<float*> _d_output_ptrs;
 44 | protected:
 45 |   void deserialize(void const* serialData, size_t serialLength) {
 46 |     deserializeBase(serialData, serialLength);
 47 |     deserialize_value(&serialData, &serialLength, &_axis);
 48 |     deserialize_value(&serialData, &serialLength, &_output_lengths);
 49 |   }
 50 |   virtual size_t getSerializationSize() const override {
 51 |     return serialized_size(_axis) + serialized_size(_output_lengths)
 52 |       + getBaseSerializationSize();
 53 |   }
 54 |   virtual void serialize(void *buffer) const override {
 55 |     serializeBase(buffer);
 56 |     serialize_value(&buffer, _axis);
 57 |     serialize_value(&buffer, _output_lengths);
 58 |   }
 59 | public:
 60 |   SplitPlugin(int axis, std::vector<int> const& output_lengths)
 61 |     : _axis(axis), _output_lengths(output_lengths) {
 62 |     assert(axis <= nvinfer1::Dims::MAX_DIMS);
 63 |   }
 64 |   SplitPlugin(void const* serialData, size_t serialLength) {
 65 |     this->deserialize(serialData, serialLength);
 66 |   }
 67 |   virtual const char* getPluginType() const override { return SPLIT_PLUGIN_NAME; }
 68 | 
 69 |   virtual void destroy() override { delete this; }
 70 | 
 71 |   virtual nvinfer1::IPluginV2* clone() const override { return new SplitPlugin{_axis, _output_lengths}; }
 72 | 
 73 |   virtual const char* getPluginVersion() const override { return SPLIT_PLUGIN_VERSION; }
 74 | 
 75 |   virtual void setPluginNamespace(const char* pluginNamespace) override {}
 76 | 
 77 |   virtual const char* getPluginNamespace() const override { return ""; }
 78 | 
 79 |   virtual int getNbOutputs() const override { return _output_lengths.size(); }
 80 |   virtual nvinfer1::Dims getOutputDimensions(int index,
 81 |                                              const nvinfer1::Dims *inputs, int nbInputDims) override;
 82 |   virtual int initialize() override;
 83 |   virtual int enqueue(int batchSize,
 84 |                       const void *const *inputs, void **outputs,
 85 |                       void *workspace, cudaStream_t stream) override;
 86 | };
 87 | 
 88 | class SplitPluginCreator : public nvinfer1::IPluginCreator
 89 | {
 90 | public:
 91 |   SplitPluginCreator() {}
 92 | 
 93 |   ~SplitPluginCreator() {}
 94 | 
 95 |   const char* getPluginName() const { return SPLIT_PLUGIN_NAME; }
 96 | 
 97 |   const char* getPluginVersion() const { return SPLIT_PLUGIN_VERSION; }
 98 | 
 99 |   const nvinfer1::PluginFieldCollection* getFieldNames() { std::cerr<< "Function not implemented" << std::endl; return nullptr; }
100 | 
101 |   nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) { std::cerr<< "Function not implemented" << std::endl; return nullptr; }
102 | 
103 |   nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) { return new SplitPlugin{serialData, serialLength}; }
104 | 
105 |   void setPluginNamespace(const char* libNamespace) { mNamespace = libNamespace; }
106 | 
107 |   const char* getPluginNamespace() const { return mNamespace.c_str(); }
108 | private:
109 |     std::string mNamespace;
110 | };
111 | 
112 | REGISTER_TENSORRT_PLUGIN(SplitPluginCreator);
113 | 


--------------------------------------------------------------------------------
/src/onnx_parser/Status.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #pragma once
 24 | 
 25 | #include "NvOnnxParser.h"
 26 | 
 27 | #include <string>
 28 | #include <cassert>
 29 | 
 30 | #define MAKE_ERROR(desc, code) \
 31 |   Status((code), (desc), __FILE__, __LINE__, __func__)
 32 | 
 33 | #define ASSERT(condition, error_code) do { \
 34 |     if( !(condition) ) { \
 35 |       return MAKE_ERROR("Assertion failed: " #condition, (error_code)); \
 36 |     } \
 37 |   } while(0)
 38 | 
 39 | #define MAKE_INPUT_ERROR(desc, code, name) \
 40 |   Status((code), (desc), name, __LINE__, __func__)
 41 | 
 42 | #define ASSERT_INPUT(condition, error_code, name) do {\
 43 |     if ( !(condition) ) {\
 44 |       return MAKE_INPUT_ERROR("Assertion failed: " #condition, (error_code), (name)); \
 45 |     } \
 46 |   } while(0)
 47 | 
 48 | #define ASSERT_C(condition, error_code) do { \
 49 |     if( !(condition) ) { \
 50 |       return error_code; \
 51 |     } \
 52 |   } while(0)
 53 | 
 54 | #define GET_VALUE(value_or_error_, result_ptr) do { \
 55 |     auto const& value_or_error = value_or_error_; \
 56 |     if( value_or_error.is_error() ) { \
 57 |       return value_or_error.error(); \
 58 |     } else { \
 59 |       *result_ptr = value_or_error.value(); \
 60 |     } \
 61 |   } while(0)
 62 | 
 63 | #define TRT_CHECK(call) do { \
 64 |     Status status = call; \
 65 |     if( !status.is_success() ) { \
 66 |       return status; \
 67 |     } \
 68 |   } while(0)
 69 | 
 70 | namespace onnx2trt {
 71 | 
 72 | using nvonnxparser::ErrorCode;
 73 | 
 74 | class Status : public nvonnxparser::IParserError {
 75 |   ErrorCode   _code;
 76 |   std::string _desc;
 77 |   std::string _file;
 78 |   int         _line;
 79 |   std::string _func;
 80 |   int         _node;
 81 | public:
 82 |   static Status success() { return Status(ErrorCode::kSUCCESS); }
 83 |   Status() {}
 84 |   explicit Status(ErrorCode code, std::string desc="",
 85 |                   std::string file="", int line=0, std::string func="",
 86 |                   int node=-1)
 87 |     : _code(code), _desc(desc), _file(file), _line(line), _func(func),
 88 |       _node(node) {}
 89 |   ErrorCode   code() const override { return _code; }
 90 |   const char* desc() const override { return _desc.c_str(); }
 91 |   const char* file() const override { return _file.c_str(); }
 92 |   int         line() const override { return _line; }
 93 |   const char* func() const override { return _func.c_str(); }
 94 |   int         node() const override { return _node; }
 95 |   bool is_error()   const { return _code != ErrorCode::kSUCCESS; }
 96 |   bool is_success() const { return _code == ErrorCode::kSUCCESS; }
 97 |   void setNode(int node) { _node = node; }
 98 | };
 99 | 
100 | template<typename T>
101 | class ValueOrStatus {
102 |   bool   _is_error;
103 |   T      _value;
104 |   Status _error;
105 | public:
106 |   ValueOrStatus(T const& value) : _is_error(false), _value(value), _error(Status::success()) {}
107 |   ValueOrStatus(T&& value)      : _is_error(false), _value(value), _error(Status::success()) {}
108 |   ValueOrStatus(Status const& error) : _is_error(true), _error(error) {}
109 |   ValueOrStatus(Status&& error)      : _is_error(true), _error(error) {}
110 |   bool is_error() const { return _is_error; }
111 |   T     const& value() const { assert(!_is_error); return _value; }
112 |   T&           value()       { assert(!_is_error); return _value; }
113 |   Status const& error() const { assert( _is_error); return _error; }
114 | };
115 | 
116 | } // namespace onnx2trt
117 | 


--------------------------------------------------------------------------------
/src/onnx_parser/TensorOrWeights.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include "ShapedWeights.hpp"
26 | 
27 | #include <NvInfer.h>
28 | #include <cassert>
29 | 
30 | namespace onnx2trt {
31 | 
32 | class TensorOrWeights {
33 |   union {
34 |     nvinfer1::ITensor* _tensor;
35 |     ShapedWeights      _weights;
36 |   };
37 |   enum { NODE_TENSOR, NODE_WEIGHTS } _variant;
38 | public:
39 |   inline TensorOrWeights() : _tensor(nullptr), _variant(NODE_TENSOR) {}
40 |   inline TensorOrWeights(nvinfer1::ITensor* tensor)
41 |     : _tensor(tensor), _variant(NODE_TENSOR) {}
42 |   inline TensorOrWeights(ShapedWeights const& weights)
43 |     : _weights(weights), _variant(NODE_WEIGHTS) {}
44 |   inline bool is_tensor()  const { return _variant == NODE_TENSOR; }
45 |   inline bool is_weights() const { return _variant == NODE_WEIGHTS; }
46 |   inline nvinfer1::ITensor& tensor() {
47 |     assert(this->is_tensor());
48 |     return *_tensor;
49 |   }
50 |   inline nvinfer1::ITensor const& tensor() const {
51 |     assert(this->is_tensor());
52 |     return *_tensor;
53 |   }
54 |   inline ShapedWeights& weights() {
55 |     assert(this->is_weights());
56 |     return _weights;
57 |   }
58 |   inline ShapedWeights const& weights() const {
59 |     assert(this->is_weights());
60 |     return _weights;
61 |   }
62 |   inline nvinfer1::Dims shape() const {
63 |     return this->is_tensor() ? _tensor->getDimensions() : _weights.shape;
64 |   }
65 |   inline operator bool() const {
66 |     return this->is_tensor() ? (bool)_tensor : (bool)_weights;
67 |   }
68 |   nvinfer1::ITensor* reset_tensor(nvinfer1::ITensor* tensor) {
69 |     assert(this->is_tensor());
70 |     return _tensor = tensor;
71 |   }
72 | };
73 | 
74 | } // namespace onnx2trt
75 | 


--------------------------------------------------------------------------------
/src/onnx_parser/builtin_op_importers.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include "onnx2trt.hpp"
26 | #include "utils.hpp"
27 | 
28 | namespace onnx2trt {
29 | 
30 | string_map<NodeImporter>& getBuiltinOpImporterMap();
31 | 
32 | } // namespace onnx2trt
33 | 


--------------------------------------------------------------------------------
/src/onnx_parser/common.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | #pragma once
 23 | 
 24 | #include <onnx/onnx_pb.h>
 25 | #include <memory>
 26 | #include <fstream>
 27 | #include <iostream>
 28 | #include <ctime>
 29 | #include <fcntl.h> // For ::open
 30 | #include <limits>
 31 | #include <google/protobuf/io/coded_stream.h>
 32 | #include <google/protobuf/io/zero_copy_stream_impl.h>
 33 | #include <google/protobuf/text_format.h>
 34 | 
 35 | #ifdef _WIN32
 36 | #include <io.h>
 37 | #endif
 38 | 
 39 | // Namespace for common functions used throughout onnx-trt
 40 | namespace common
 41 | {
 42 |   struct InferDeleter {
 43 |       template<typename T>
 44 |       void operator()(T* obj) const {
 45 |   	if( obj ) {
 46 |   	    obj->destroy();
 47 |   	}
 48 |       }
 49 |   };
 50 | 
 51 |   template<typename T>
 52 |   inline std::shared_ptr<T> infer_object(T* obj) {
 53 |       if( !obj ) {
 54 |   	throw std::runtime_error("Failed to create object");
 55 |       }
 56 |       return std::shared_ptr<T>(obj, InferDeleter());
 57 |   }
 58 | 
 59 |   // Logger for TensorRT info/warning/errors
 60 |   class TRT_Logger : public nvinfer1::ILogger {
 61 |     nvinfer1::ILogger::Severity _verbosity;
 62 |     std::ostream* _ostream;
 63 |   public:
 64 |     TRT_Logger(Severity verbosity=Severity::kWARNING,
 65 |                std::ostream& ostream=std::cout)
 66 |       : _verbosity(verbosity), _ostream(&ostream) {}
 67 |     void log(Severity severity, const char* msg) override {
 68 |       if( severity <= _verbosity ) {
 69 |         time_t rawtime = std::time(0);
 70 |         char buf[256];
 71 |         strftime(&buf[0], 256,
 72 |                  "%Y-%m-%d %H:%M:%S",
 73 |                  std::gmtime(&rawtime));
 74 |         const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? "    BUG" :
 75 |                               severity == Severity::kERROR          ? "  ERROR" :
 76 |                               severity == Severity::kWARNING        ? "WARNING" :
 77 |                               severity == Severity::kINFO           ? "   INFO" :
 78 |                               "UNKNOWN");
 79 |         (*_ostream) << "[" << buf << " " << sevstr << "] "
 80 |                     << msg
 81 |                     << std::endl;
 82 |       }
 83 |     }
 84 |   };
 85 | 
 86 |   inline bool ParseFromFile_WAR(google::protobuf::Message* msg,
 87 |                          const char*                filename) {
 88 | 	int fd = ::open(filename, O_RDONLY);
 89 |     google::protobuf::io::FileInputStream raw_input(fd);
 90 |     raw_input.SetCloseOnDelete(true);
 91 |     google::protobuf::io::CodedInputStream coded_input(&raw_input);
 92 |     // Note: This WARs the very low default size limit (64MB)
 93 |     coded_input.SetTotalBytesLimit(std::numeric_limits<int>::max(),
 94 |                                    std::numeric_limits<int>::max()/4);
 95 |     return msg->ParseFromCodedStream(&coded_input);
 96 |   }
 97 | 
 98 |   inline bool ParseFromTextFile(google::protobuf::Message* msg,
 99 |                          const char*                filename) {
100 |     int fd = ::open(filename, O_RDONLY);
101 |     google::protobuf::io::FileInputStream raw_input(fd);
102 |     raw_input.SetCloseOnDelete(true);
103 |     return google::protobuf::TextFormat::Parse(&raw_input, msg);
104 |   }
105 | 
106 |   inline std::string onnx_ir_version_string(int64_t ir_version=::ONNX_NAMESPACE::IR_VERSION) {
107 |     int onnx_ir_major = ir_version / 1000000;
108 |     int onnx_ir_minor = ir_version % 1000000 / 10000;
109 |     int onnx_ir_patch = ir_version % 10000;
110 |     return (std::to_string(onnx_ir_major) + "." +
111 |             std::to_string(onnx_ir_minor) + "." +
112 |             std::to_string(onnx_ir_patch));
113 |   }
114 | 
115 |   inline void print_version() {
116 |     std::cout << "Parser built against:" << std::endl;
117 |     std::cout << "  ONNX IR version:  " << onnx_ir_version_string(::ONNX_NAMESPACE::IR_VERSION) << std::endl;
118 |     std::cout << "  TensorRT version: "
119 |          << NV_TENSORRT_MAJOR << "."
120 |          << NV_TENSORRT_MINOR << "."
121 |          << NV_TENSORRT_PATCH << std::endl;
122 |   }
123 | } // namespace common


--------------------------------------------------------------------------------
/src/onnx_parser/onnx2trt.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include "NvOnnxParser.h"
26 | #include "ShapedWeights.hpp"
27 | #include "TensorOrWeights.hpp"
28 | #include "Status.hpp"
29 | #include "plugin.hpp"
30 | 
31 | #include <onnx/onnx_pb.h>
32 | #include <NvInfer.h>
33 | #include <vector>
34 | #include <functional>
35 | 
36 | namespace onnx2trt {
37 | 
38 | class IImporterContext;
39 | 
40 | // TODO: Find ABI-safe alternative approach for this:
41 | //         Can't use std::vector
42 | //         Can't use ::onnx::NodeProto
43 | //         Can't use std::function
44 | typedef ValueOrStatus<std::vector<TensorOrWeights>> NodeImportResult;
45 | typedef std::function<NodeImportResult(IImporterContext *ctx,
46 |                                        ::ONNX_NAMESPACE::NodeProto const &node,
47 |                                        std::vector<TensorOrWeights> &inputs)>
48 |     NodeImporter;
49 | 
50 | struct IImporterContext {
51 |   virtual nvinfer1::INetworkDefinition* network() = 0;
52 |   virtual ShapedWeights createTempWeights(ShapedWeights::DataType type,
53 |                                           nvinfer1::Dims shape) = 0;
54 |   virtual nvinfer1::IPluginV2Layer* addPluginV2(
55 |       PluginV2* plugin,
56 |       std::vector<nvinfer1::ITensor*> const& inputs) = 0;
57 |   virtual int64_t getOpsetVersion(const char* domain="") const = 0;
58 | protected:
59 |   virtual ~IImporterContext() {}
60 | };
61 | 
62 | } // namespace onnx2trt
63 | 


--------------------------------------------------------------------------------
/src/onnx_parser/onnx2trt_common.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include <NvInfer.h>
26 | #include <memory>
27 | 
28 | # if NV_TENSORRT_MAJOR < 4
29 | namespace nvinfer1 {
30 | 
31 |   enum class PluginFormat: uint8_t
32 |   {
33 |       kNCHW        = 0, //!< NCHW
34 |       kNC2HW2      = 1, //!< NCHW with 2-element packed channels
35 |       kNHWC8       = 2  //!< NHWC with 8-element packed channels (C
36 |                         //!must be a multiple of 8)
37 |   };
38 |   // from NvInfer.h
39 |   class IPluginExt : public IPlugin
40 |   {
41 |   public:
42 |     virtual int getTensorRTVersion() const
43 |     {
44 |         return NV_TENSORRT_VERSION;
45 |     }
46 |     virtual bool supportsFormat(DataType type, PluginFormat format) const = 0;
47 |     virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) = 0;
48 | 
49 |   protected:
50 |     void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) final {
51 |       DataType     type   = nvinfer1::DataType::kFLOAT;
52 |       PluginFormat format = nvinfer1::PluginFormat::kNCHW;
53 |       return this->configureWithFormat(
54 |           inputDims, nbInputs, outputDims, nbOutputs, type, format, maxBatchSize);
55 |     }
56 |     virtual ~IPluginExt() {}
57 |   };
58 | 
59 | } // namespace nvinfer1
60 | #endif
61 | 
62 | namespace onnx2trt {
63 | 
64 | struct IOwnable {
65 |   virtual void destroy() = 0;
66 | protected:
67 |   virtual ~IOwnable() {}
68 | };
69 | 
70 | struct OwnableDeleter {
71 |   void operator()(IOwnable* obj) const {
72 |     obj->destroy();
73 |   }
74 | };
75 | 
76 | using UniqueOwnable = std::unique_ptr<IOwnable, OwnableDeleter>;
77 | class Plugin;
78 | 
79 | } // namespace onnx2trt
80 | 


--------------------------------------------------------------------------------
/src/onnx_parser/onnx2trt_utils.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #include "onnx2trt_utils.hpp"
24 | 
25 | namespace onnx2trt {
26 | 
27 | void setAttr(nvinfer1::Dims * trtAttr, ::ONNX_NAMESPACE::AttributeProto const* onnxAttr, int nbSpatialDims, int defaultVal){
28 |   assert(trtAttr->nbDims == nbSpatialDims);
29 |   int ndim = onnxAttr->ints().size();
30 |   for(int i = 0; i < nbSpatialDims; ++i){
31 |       if(i < ndim){
32 |         trtAttr->d[i] = onnxAttr->ints(i);
33 |       } else {
34 |         trtAttr->d[i] = defaultVal;
35 |       }
36 |   }
37 | }
38 | 
39 | void get_kernel_params(::ONNX_NAMESPACE::NodeProto const& onnx_node,
40 |                        nvinfer1::Dims* kernel_size,
41 |                        nvinfer1::Dims* strides,
42 |                        nvinfer1::Dims* beg_padding,
43 |                        nvinfer1::Dims* end_padding,
44 |                        nvinfer1::PaddingMode& paddingMode,
45 |                        nvinfer1::Dims* dilations) {
46 |   const int nbSpatialDims = kernel_size->nbDims;
47 |   OnnxAttrs attrs(onnx_node);
48 |   if( attrs.count("kernel_shape") ) {
49 |     auto const* onnx_kernel_size = attrs.at("kernel_shape");
50 |     setAttr(kernel_size, onnx_kernel_size, nbSpatialDims, 1);
51 |   }
52 |   if( attrs.count("strides") ) {
53 |     auto const* onnx_strides = attrs.at("strides");
54 |     setAttr(strides, onnx_strides, nbSpatialDims, 1);
55 |   }
56 |   if( dilations && attrs.count("dilations") ) {
57 |     auto const* onnx_dilations = attrs.at("dilations");
58 |     setAttr(dilations, onnx_dilations, nbSpatialDims, 1);
59 |   }
60 |   paddingMode = nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN;
61 |   auto onnx_auto_pad = attrs.get("auto_pad", std::string("NOTSET"));
62 |   if( onnx_auto_pad == "VALID" || onnx_auto_pad == "NOTSET" ) {
63 |     if( attrs.count("pads") ) {
64 |       auto onnx_padding = attrs.get<std::vector<int>>("pads");
65 |       int ndim = onnx_padding.size() / 2;
66 |       for(int i = 0; i < nbSpatialDims; ++i){
67 |         if(i < ndim){
68 |           beg_padding->d[i] = onnx_padding.at(i);
69 |           end_padding->d[i] = onnx_padding.at(i + ndim);
70 |         } else {
71 |           beg_padding->d[i] = 0;
72 |           end_padding->d[i] = 0;
73 |         }
74 |       }
75 |     }
76 |   } else { // SAME_* padding
77 |     assert(!attrs.count("pads"));
78 |     // Note: ONNX is always NCHW ordering
79 |     if( onnx_auto_pad == "SAME_LOWER" ) {
80 |     paddingMode = nvinfer1::PaddingMode::kSAME_LOWER;
81 |     } else if( onnx_auto_pad == "SAME_UPPER" ) {
82 |     paddingMode = nvinfer1::PaddingMode::kSAME_UPPER;
83 |     } else {
84 |       throw std::invalid_argument("Unexpected auto_pad value: " +
85 |                                   onnx_auto_pad);
86 |     }
87 |   }
88 | }
89 | 
90 | } // namespace onnx2trt
91 | 


--------------------------------------------------------------------------------
/src/onnx_parser/onnx_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #include <onnx/onnx_pb.h>
24 | #include <google/protobuf/text_format.h>
25 | #include <sstream>
26 | 
27 | namespace {
28 | 
29 | // Removes raw data from the text representation of an ONNX model
30 | inline void remove_raw_data_strings(std::string& s) {
31 |   std::string::size_type beg = 0;
32 |   const std::string key = "raw_data: \"";
33 |   const std::string sub = "...";
34 |   while( (beg = s.find(key, beg)) != std::string::npos ) {
35 |     beg += key.length();
36 |     std::string::size_type end = beg - 1;
37 |     // Note: Must skip over escaped end-quotes
38 |     while( s[(end = s.find("\"", ++end)) - 1] == '\\' ) {}
39 |     if( end - beg > 128 ) { // Only remove large data strings
40 |       s.replace(beg, end - beg, "...");
41 |     }
42 |     beg += sub.length();
43 |   }
44 | }
45 | 
46 | // Removes float_data, int32_data etc. from the text representation of an ONNX model
47 | inline std::string remove_repeated_data_strings(std::string& s) {
48 |   std::istringstream iss(s);
49 |   std::ostringstream oss;
50 |   bool is_repeat = false;
51 |   for( std::string line; std::getline(iss, line); ) {
52 |     if(  line.find("float_data:") != std::string::npos ||
53 |          line.find("int32_data:") != std::string::npos ||
54 |          line.find("int64_data:") != std::string::npos ) {
55 |       if( !is_repeat ) {
56 |         is_repeat = true;
57 |         oss << line.substr(0, line.find(":") + 1) << " ...\n";
58 |       }
59 |     } else {
60 |       is_repeat = false;
61 |       oss << line << "\n";
62 |     }
63 |   }
64 |   return oss.str();
65 | }
66 | 
67 | } // anonymous namespace
68 | 
69 | inline std::string pretty_print_onnx_to_string(::google::protobuf::Message const& message) {
70 |   std::string s;
71 |   ::google::protobuf::TextFormat::PrintToString(message, &s);
72 |   remove_raw_data_strings(s);
73 |   s = remove_repeated_data_strings(s);
74 |   return s;
75 | }
76 | 
77 | inline std::ostream& operator<<(std::ostream& stream, ::ONNX_NAMESPACE::ModelProto const& message) {
78 |   stream << pretty_print_onnx_to_string(message);
79 |   return stream;
80 | }
81 | 
82 | inline std::ostream& operator<<(std::ostream& stream, ::ONNX_NAMESPACE::NodeProto const& message) {
83 |   stream << pretty_print_onnx_to_string(message);
84 |   return stream;
85 | }
86 | 


--------------------------------------------------------------------------------
/src/onnx_parser/plugin.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include "plugin.hpp"
 24 | #include "serialize.hpp"
 25 | #include <NvInferPlugin.h>
 26 | 
 27 | namespace onnx2trt {
 28 | 
 29 | // ========================= Plugin =====================
 30 | 
 31 |   void Plugin::serializeBase(void*& buffer) {
 32 |     serialize_value(&buffer, _input_dims);
 33 |     serialize_value(&buffer, _max_batch_size);
 34 |     serialize_value(&buffer, _data_type);
 35 |     serialize_value(&buffer, _data_format);
 36 |   }
 37 | 
 38 |   void Plugin::deserializeBase(void const*& serialData, size_t& serialLength) {
 39 |     deserialize_value(&serialData, &serialLength, &_input_dims);
 40 |     deserialize_value(&serialData, &serialLength, &_max_batch_size);
 41 |     deserialize_value(&serialData, &serialLength, &_data_type);
 42 |     deserialize_value(&serialData, &serialLength, &_data_format);
 43 |   }
 44 | 
 45 |   size_t Plugin::getBaseSerializationSize() {
 46 |     return (serialized_size(_input_dims) +
 47 |             serialized_size(_max_batch_size) +
 48 |             serialized_size(_data_type) +
 49 |             serialized_size(_data_format));
 50 |   }
 51 | 
 52 |   bool Plugin::supportsFormat(nvinfer1::DataType type,
 53 |                               nvinfer1::PluginFormat format) const {
 54 |     return ((type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF)  &&
 55 |             (format == nvinfer1::PluginFormat::kNCHW));
 56 |   }
 57 | 
 58 |   void Plugin::configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs,
 59 |                                    const nvinfer1::Dims* outputDims, int nbOutputs,
 60 |                                    nvinfer1::DataType type,
 61 |                                    nvinfer1::PluginFormat format,
 62 |                                    int maxBatchSize)  {
 63 |     _data_type = type;
 64 |     _data_format = format;
 65 |     _input_dims.assign(inputDims, inputDims + nbInputs);
 66 |     _max_batch_size = maxBatchSize;
 67 |   }
 68 | 
 69 | // ========================= PluginV2 =====================
 70 | 
 71 |   void PluginV2::serializeBase(void*& buffer) const {
 72 |     serialize_value(&buffer, _input_dims);
 73 |     serialize_value(&buffer, _max_batch_size);
 74 |     serialize_value(&buffer, _data_type);
 75 |     serialize_value(&buffer, _data_format);
 76 |   }
 77 | 
 78 |   void PluginV2::deserializeBase(void const*& serialData, size_t& serialLength) {
 79 |     deserialize_value(&serialData, &serialLength, &_input_dims);
 80 |     deserialize_value(&serialData, &serialLength, &_max_batch_size);
 81 |     deserialize_value(&serialData, &serialLength, &_data_type);
 82 |     deserialize_value(&serialData, &serialLength, &_data_format);
 83 |   }
 84 | 
 85 |   size_t PluginV2::getBaseSerializationSize() const {
 86 |     return (serialized_size(_input_dims) +
 87 |             serialized_size(_max_batch_size) +
 88 |             serialized_size(_data_type) +
 89 |             serialized_size(_data_format));
 90 |   }
 91 | 
 92 |   bool PluginV2::supportsFormat(nvinfer1::DataType type,
 93 |                               nvinfer1::PluginFormat format) const {
 94 |     return ((type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF)  &&
 95 |             (format == nvinfer1::PluginFormat::kNCHW));
 96 |   }
 97 | 
 98 |   void PluginV2::configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs,
 99 |                                    const nvinfer1::Dims* outputDims, int nbOutputs,
100 |                                    nvinfer1::DataType type,
101 |                                    nvinfer1::PluginFormat format,
102 |                                    int maxBatchSize)  {
103 |     _data_type = type;
104 |     _data_format = format;
105 |     _input_dims.assign(inputDims, inputDims + nbInputs);
106 |     _max_batch_size = maxBatchSize;
107 |   }
108 | 
109 | // ========================= PluginAdapter =====================
110 | 
111 |   int PluginAdapter::getNbOutputs() const {
112 |     return _pluginV2->getNbOutputs();
113 |   }
114 |   nvinfer1::Dims PluginAdapter::getOutputDimensions(int index,
115 |                                                     const nvinfer1::Dims *inputDims,
116 |                                                     int nbInputs)  {
117 |     return _pluginV2->getOutputDimensions(index, inputDims, nbInputs);
118 |   }
119 |   void PluginAdapter::serialize(void* buffer) const {
120 |     return _pluginV2->serialize(buffer);
121 |   }
122 |   size_t PluginAdapter::getSerializationSize() const {
123 |     return _pluginV2->getSerializationSize();
124 |   }
125 |   bool PluginAdapter::supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const
126 |   {
127 |     return _pluginV2->supportsFormat(type, format);
128 |   }
129 |   void PluginAdapter::configureWithFormat(const nvinfer1::Dims *inputDims, int nbInputs,
130 |                                           const nvinfer1::Dims *outputDims, int nbOutputs,
131 |                                           nvinfer1::DataType type,
132 |                                           nvinfer1::PluginFormat format,
133 |                                           int maxBatchSize) {
134 |     return _pluginV2->configureWithFormat(inputDims, nbInputs,
135 |                                    outputDims, nbOutputs,
136 |                                    type, format, maxBatchSize);
137 |   }
138 |   size_t PluginAdapter::getWorkspaceSize(int maxBatchSize) const {
139 |     return _pluginV2->getWorkspaceSize(maxBatchSize);
140 |   }
141 |   int PluginAdapter::initialize() { return _pluginV2->initialize(); }
142 | 
143 |   void PluginAdapter::terminate() {
144 |     if (_pluginV2) {
145 |       _pluginV2->terminate();
146 |     }
147 |   }
148 | 
149 |   void PluginAdapter::destroy() {
150 |     return _pluginV2->destroy();
151 |   }
152 | 
153 |   nvinfer1::IPluginV2* PluginAdapter::clone() const {
154 |     return _pluginV2->clone();
155 |   }
156 | 
157 |   const char* PluginAdapter::getPluginVersion() const {
158 |     return _pluginV2->getPluginVersion();
159 |   }
160 | 
161 |   void PluginAdapter::setPluginNamespace(const char* pluginNamespace) {
162 |     return _pluginV2->setPluginNamespace(pluginNamespace);
163 |   }
164 | 
165 |   const char* PluginAdapter::getPluginNamespace() const {
166 |       return _pluginV2->getPluginNamespace();
167 |   }
168 | 
169 |   int PluginAdapter::enqueue(int batchSize,
170 |                              const void *const *inputs, void **outputs,
171 |                              void *workspace, cudaStream_t stream) {
172 |     return _pluginV2->enqueue(batchSize, inputs, outputs, workspace, stream);
173 |   }
174 | 
175 | } // namespace onnx2trt
176 | 


--------------------------------------------------------------------------------
/src/onnx_parser/plugin_common.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | #pragma once
23 | 
24 | namespace onnx2trt {
25 |   static const char REGISTERABLE_PLUGIN_MAGIC_STRING[] = "Pv01";
26 | } // namespace onnx2trt
27 | 
28 | 


--------------------------------------------------------------------------------
/src/onnx_parser/serialize.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #pragma once
 24 | 
 25 | #include <cstring>
 26 | #include <vector>
 27 | #include <cassert>
 28 | #include <type_traits>
 29 | 
 30 | #include <iostream>
 31 | using std::cout;
 32 | using std::cerr;
 33 | using std::endl;
 34 | 
 35 | template<typename T>
 36 | inline void serialize_value(void** buffer, T const& value);
 37 | 
 38 | template<typename T>
 39 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value);
 40 | 
 41 | namespace {
 42 | 
 43 | template<typename T, class Enable=void>
 44 | struct Serializer {};
 45 | 
 46 | template<typename T>
 47 | struct Serializer<T, typename std::enable_if<
 48 |                        std::is_arithmetic<T>::value ||
 49 |                        std::is_enum<T>::value ||
 50 |                        std::is_pod<T>::value>::type> {
 51 |   static size_t serialized_size(T const& value) {
 52 |     return sizeof(T);
 53 |   }
 54 |   static void serialize(void** buffer, T const& value) {
 55 |     ::memcpy(*buffer, &value, sizeof(T));
 56 |     reinterpret_cast<char*&>(*buffer) += sizeof(T);
 57 |   }
 58 |   static void deserialize(void const** buffer, size_t* buffer_size, T* value) {
 59 |     assert(*buffer_size >= sizeof(T));
 60 |     ::memcpy(value, *buffer, sizeof(T));
 61 |     reinterpret_cast<char const*&>(*buffer) += sizeof(T);
 62 |     *buffer_size -= sizeof(T);
 63 |   }
 64 | };
 65 | 
 66 | template<>
 67 | struct Serializer<const char*> {
 68 |   static size_t serialized_size(const char* value) {
 69 |     return strlen(value) + 1;
 70 |   }
 71 |   static void serialize(void** buffer, const char* value) {
 72 |     ::strcpy(static_cast<char*>(*buffer), value);
 73 |     reinterpret_cast<char*&>(*buffer) += strlen(value) + 1;
 74 |   }
 75 |   static void deserialize(void const** buffer, size_t* buffer_size, const char** value) {
 76 |     *value = static_cast<char const*>(*buffer);
 77 |     size_t data_size = strnlen(*value, *buffer_size) + 1;
 78 |     assert(*buffer_size >= data_size);
 79 |     reinterpret_cast<char const*&>(*buffer) += data_size;
 80 |     *buffer_size -= data_size;
 81 |   }
 82 | };
 83 | 
 84 | template<typename T>
 85 | struct Serializer<std::vector<T>, typename std::enable_if<
 86 |                                     std::is_arithmetic<T>::value ||
 87 |                                     std::is_enum<T>::value ||
 88 |                                     std::is_pod<T>::value>::type> {
 89 |   static size_t serialized_size(std::vector<T> const& value) {
 90 |     return sizeof(value.size()) + value.size() * sizeof(T);
 91 |   }
 92 |   static void serialize(void** buffer, std::vector<T> const& value) {
 93 |     serialize_value(buffer, value.size());
 94 |     size_t nbyte = value.size() * sizeof(T);
 95 |     ::memcpy(*buffer, value.data(), nbyte);
 96 |     reinterpret_cast<char*&>(*buffer) += nbyte;
 97 |   }
 98 |   static void deserialize(void const** buffer, size_t* buffer_size, std::vector<T>* value) {
 99 |     size_t size;
100 |     deserialize_value(buffer, buffer_size, &size);
101 |     value->resize(size);
102 |     size_t nbyte = value->size() * sizeof(T);
103 |     assert(*buffer_size >= nbyte);
104 |     ::memcpy(value->data(), *buffer, nbyte);
105 |     reinterpret_cast<char const*&>(*buffer) += nbyte;
106 |     *buffer_size -= nbyte;
107 |   }
108 | };
109 | 
110 | } // namespace
111 | 
112 | template<typename T>
113 | inline size_t serialized_size(T const& value) {
114 |   return Serializer<T>::serialized_size(value);
115 | }
116 | 
117 | template<typename T>
118 | inline void serialize_value(void** buffer, T const& value) {
119 |   return Serializer<T>::serialize(buffer, value);
120 | }
121 | 
122 | template<typename T>
123 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value) {
124 |   return Serializer<T>::deserialize(buffer, buffer_size, value);
125 | }
126 | 


--------------------------------------------------------------------------------
/src/onnx_parser/toposort.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include <vector>
26 | #include <unordered_map>
27 | 
28 | #include <iostream>
29 | using std::cout;
30 | using std::cerr;
31 | using std::endl;
32 | 
33 | namespace {
34 | 
35 | enum NodeState { NODE_UNVISITED, NODE_ACTIVE, NODE_VISITED };
36 | 
37 | template<class Container>
38 | bool get_post_order(size_t node_idx,
39 |                     Container const& nodes,
40 |                     std::unordered_map<std::string, size_t> const& node_map,
41 |                     std::vector<NodeState>* node_states,
42 |                     std::vector<size_t>* order) {
43 |   NodeState& node_state = node_states->at(node_idx);
44 |   if( node_state == NODE_ACTIVE ) {
45 |     // Cycle detected!
46 |     cerr << "ERROR: Graph contains a cycle" << endl;
47 |     return false;
48 |   } else if( node_state == NODE_VISITED ) {
49 |     return true;
50 |   } else {
51 |     node_state = NODE_ACTIVE;
52 |     // TODO: This .Get().input() is highly specific to protobuf, should
53 |     //       generalise it somehow.
54 |     for( auto const& input : nodes.Get(node_idx).input() ) {
55 |       if( !node_map.count(input) ) {
56 |         // Input node not found in graph!
57 |         //cerr << "ERROR: Input node not found in graph: "
58 |         //     << input << endl;
59 |         //return false;
60 |         continue; // Skip missing input edges
61 |       }
62 |       size_t input_node_idx = node_map.at(input);
63 |       if( !get_post_order(input_node_idx, nodes, node_map, node_states, order) ) {
64 |         return false;
65 |       }
66 |     }
67 |     node_state = NODE_VISITED;
68 |     order->push_back(node_idx);
69 |   }
70 |   return true;
71 | }
72 | 
73 | } // anonymous namespace
74 | 
75 | template<class Container>
76 | bool toposort(Container const& nodes, std::vector<size_t>* order) {
77 |   std::unordered_map<std::string, size_t> node_map;
78 |   for( size_t i=0; i<(size_t)nodes.size(); ++i ) {
79 |     // TODO: This .Get().input() is highly specific to protobuf, should
80 |     //       generalise it somehow.
81 |     for( auto const& output : nodes.Get(i).output() ) {
82 |       if( !node_map.emplace(output, i).second ) {
83 |         // Output name appears more than once in graph!
84 |         cerr << "ERROR: Output name is not unique: "
85 |              << output << endl;
86 |         return false;
87 |       }
88 |     }
89 |   }
90 |   order->reserve(nodes.size());
91 |   std::vector<NodeState> node_states(nodes.size(), NODE_UNVISITED);
92 |   for( size_t i=0; i<(size_t)nodes.size(); ++i ) {
93 |     if( !get_post_order(i, nodes, node_map, &node_states, order) ) {
94 |       return false;
95 |     }
96 |   }
97 |   return true;
98 | }
99 | 


--------------------------------------------------------------------------------
/src/onnx_parser/utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #pragma once
24 | 
25 | #include <unordered_map>
26 | 
27 | template<typename T>
28 | using string_map = std::unordered_map<std::string, T>;
29 | 


--------------------------------------------------------------------------------
/src/onnxplugin/plugins/DCNv2.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef DCNv2_HPP
 3 | #define DCNv2_HPP
 4 | 
 5 | #include <onnxplugin/onnxplugin.hpp>
 6 | #include <cublas_v2.h>
 7 | 
 8 | using namespace ONNXPlugin;
 9 | 
10 | class DCNv2 : public TRTPlugin {
11 | public:
12 | 	SetupPlugin(DCNv2);
13 | 
14 | 	virtual int initialize() override;
15 | 	virtual void terminate() override;
16 | 	virtual std::shared_ptr<LayerConfig> config(const std::string& layerName) override;
17 | 	virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims);
18 | 	virtual size_t getWorkspaceSize(int maxBatchSize) const override;
19 | 	virtual int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) override;
20 | private:
21 | 	cublasHandle_t cublasHandle_ = nullptr;
22 | };
23 | 
24 | #endif //DCNv2_HPP


--------------------------------------------------------------------------------
/src/onnxplugin/plugins/HSigmoid.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "HSigmoid.hpp"
 4 | 
 5 | typedef TRTInfer::halfloat halfloat;
 6 | 
 7 | template<typename _T>
 8 | __global__ void HSigmoidKernel(_T* input, _T* output, int edge);
 9 | 
10 | 
11 | template<>
12 | __global__ void HSigmoidKernel(float* input, float* output, int edge) {
13 | 
14 |     KERNEL_POSITION;
15 |     float x = input[position];
16 |     float a = x + 3;
17 |     a = a < 0 ? 0 : (a >= 6 ? 6 : a);
18 | 	output[position] = a / 6;
19 | }
20 | 
21 | template<>
22 | __global__ void HSigmoidKernel(halfloat* input, halfloat* output, int edge) {
23 | 
24 | 	KERNEL_POSITION;
25 | 
26 |     halfloat _six = 6.0f;
27 | 	halfloat x = input[position];
28 |     halfloat a = x + halfloat(3.0f);
29 |     halfloat _zero = 0.0f;
30 |     a = a < _zero ? _zero : (a >= _six ? _six : a);
31 | 	output[position] = a / _six;
32 | }
33 | 
34 | void HSigmoidConfig::init(){
35 |     //INFO("init HSigmoid config: %s", info_.c_str());
36 |     //INFO("weights = %d", this->weights_.size());
37 | }
38 | 
39 | nvinfer1::Dims HSigmoid::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) {
40 | 	return inputDims[0];
41 | }
42 | 
43 | std::shared_ptr<LayerConfig> HSigmoid::config(const std::string& layerName) {
44 | 	auto cfg = std::shared_ptr<LayerConfig>(new HSigmoidConfig());
45 | 
46 | 	//定义我们这个插件支持half和float格式
47 | 	cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT};
48 | 	//cfg->supportDataType_ = {nvinfer1::DataType::kHALF};
49 | 	return cfg;
50 | }
51 | 
52 | int HSigmoid::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
53 | 
54 | 	int count = inputs[0].count();
55 | 	auto grid = gridDims(count);
56 | 	auto block = blockDims(count);
57 | 
58 | 	if (config_->configDataType_ == TRTInfer::DataType::dtFloat) {
59 | 		HSigmoidKernel <<<grid, block >>> (inputs[0].ptr<float>(), outputs[0].ptr<float>(), count);
60 | 	}
61 | 	else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) {
62 | 		HSigmoidKernel <<<grid, block>>> (inputs[0].ptr<halfloat>(), outputs[0].ptr<halfloat>(), count);
63 | 	}
64 | 	return 0;
65 | }
66 | 
67 | RegisterPlugin(HSigmoid);


--------------------------------------------------------------------------------
/src/onnxplugin/plugins/HSigmoid.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef WReLU_HPP
 3 | #define WReLU_HPP
 4 | 
 5 | #include <onnxplugin/onnxplugin.hpp>
 6 | 
 7 | using namespace ONNXPlugin;
 8 | 
 9 | class HSigmoidConfig : public LayerConfig{
10 | public:
11 | 	virtual void init() override;
12 | };
13 | 
14 | class HSigmoid : public TRTPlugin {
15 | public:
16 | 	SetupPlugin(HSigmoid);
17 | 
18 | 	virtual std::shared_ptr<LayerConfig> config(const std::string& layerName) override;
19 | 
20 | 	//这个插件只有一个输出，输出的shape等于输入0的shape，因此返回input0的shape
21 | 	virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override;
22 | 
23 | 	//执行过程
24 | 	int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) override;
25 | };
26 | 
27 | #endif //WReLU_HPP


--------------------------------------------------------------------------------
/src/onnxplugin/plugins/HSwish.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "HSwish.hpp"
 4 | 
 5 | typedef TRTInfer::halfloat halfloat;
 6 | 
 7 | template<typename _T>
 8 | __global__ void HSwishKernel(_T* input, _T* output, int edge);
 9 | 
10 | 
11 | template<>
12 | __global__ void HSwishKernel(float* input, float* output, int edge) {
13 | 
14 |     KERNEL_POSITION;
15 |     float x = input[position];
16 |     float a = x + 3;
17 |     a = a < 0 ? 0 : (a >= 6 ? 6 : a);
18 | 	output[position] = x * a / 6;
19 | }
20 | 
21 | template<>
22 | __global__ void HSwishKernel(halfloat* input, halfloat* output, int edge) {
23 | 
24 | 	KERNEL_POSITION;
25 | 
26 |     halfloat _six = 6.0f;
27 | 	halfloat x = input[position];
28 |     halfloat a = x + halfloat(3.0f);
29 |     halfloat _zero = 0.0f;
30 |     a = a < _zero ? _zero : (a >= _six ? _six : a);
31 | 	output[position] = x * a / _six;
32 | }
33 | 
34 | void HSwishConfig::init(){
35 |     //INFO("init hswish config: %s", info_.c_str());
36 |     //INFO("weights = %d", this->weights_.size());
37 | }
38 | 
39 | nvinfer1::Dims HSwish::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) {
40 | 	return inputDims[0];
41 | }
42 | 
43 | std::shared_ptr<LayerConfig> HSwish::config(const std::string& layerName) {
44 | 	auto cfg = std::shared_ptr<LayerConfig>(new HSwishConfig());
45 | 
46 | 	//定义我们这个插件支持half和float格式
47 | 	cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT};
48 | 	//cfg->supportDataType_ = {nvinfer1::DataType::kHALF};
49 | 	return cfg;
50 | }
51 | 
52 | int HSwish::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
53 | 
54 | 	int count = inputs[0].count();
55 | 	auto grid = gridDims(count);
56 | 	auto block = blockDims(count);
57 | 
58 | 	if (config_->configDataType_ == TRTInfer::DataType::dtFloat) {
59 | 		HSwishKernel <<<grid, block >>> (inputs[0].ptr<float>(), outputs[0].ptr<float>(), count);
60 | 	}
61 | 	else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) {
62 | 		HSwishKernel <<<grid, block>>> (inputs[0].ptr<halfloat>(), outputs[0].ptr<halfloat>(), count);
63 | 	}
64 | 	return 0;
65 | }
66 | 
67 | RegisterPlugin(HSwish);


--------------------------------------------------------------------------------
/src/onnxplugin/plugins/HSwish.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef WReLU_HPP
 3 | #define WReLU_HPP
 4 | 
 5 | #include <onnxplugin/onnxplugin.hpp>
 6 | 
 7 | using namespace ONNXPlugin;
 8 | 
 9 | class HSwishConfig : public LayerConfig{
10 | public:
11 | 	virtual void init() override;
12 | };
13 | 
14 | class HSwish : public TRTPlugin {
15 | public:
16 | 	SetupPlugin(HSwish);
17 | 
18 | 	virtual std::shared_ptr<LayerConfig> config(const std::string& layerName) override;
19 | 
20 | 	//这个插件只有一个输出，输出的shape等于输入0的shape，因此返回input0的shape
21 | 	virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override;
22 | 
23 | 	//执行过程
24 | 	int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) override;
25 | };
26 | 
27 | #endif //WReLU_HPP


--------------------------------------------------------------------------------
/src/onnxplugin/plugins/MReLU.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "MReLU.hpp"
 4 | #include <json.hpp>
 5 | 
 6 | typedef TRTInfer::halfloat halfloat;
 7 | 
 8 | template<typename _T>
 9 | __global__ void MReLUKernel(_T* input, _T* output, _T bias, int edge);
10 | 
11 | 
12 | template<>
13 | __global__ void MReLUKernel(float* input, float* output, float bias, int edge) {
14 | 
15 |     KERNEL_POSITION;
16 |     float x = input[position];
17 |     float a = x > 0 ? x : 0;
18 | 	output[position] = a + bias;
19 | }
20 | 
21 | template<>
22 | __global__ void MReLUKernel(halfloat* input, halfloat* output, halfloat bias, int edge) {
23 | 
24 | 	KERNEL_POSITION;
25 | 
26 | 	halfloat x = input[position];
27 |     halfloat _zero = 0.0f;
28 |     x = x > _zero ? x : _zero;
29 | 	output[position] = x + bias;
30 | }
31 | 
32 | void MReLUConfig::init(){
33 |     INFO("init MReLU config: %s", info_.c_str());
34 | 	INFO("MReLU weights = %d[%s]", this->weights_.size(), this->weights_[0]->shapeString());
35 | 	
36 | 	Json::Value value;
37 | 	if(Json::Reader().parse(info_, value)){
38 | 		INFO("MReLU kernel_size: %d", value["kernel_size"].asInt());
39 | 		INFO("MReLU eps: %g", value["eps"].asFloat());
40 | 		INFO("MReLU other: %s", value["other"].asCString());
41 | 	}
42 | }
43 | 
44 | nvinfer1::Dims MReLU::outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) {
45 | 	return inputDims[0];
46 | }
47 | 
48 | std::shared_ptr<LayerConfig> MReLU::config(const std::string& layerName) {
49 | 	auto cfg = std::shared_ptr<LayerConfig>(new MReLUConfig());
50 | 
51 | 	//定义我们这个插件支持half和float格式
52 | 	cfg->supportDataType_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT};
53 | 	//cfg->supportDataType_ = {nvinfer1::DataType::kHALF};
54 | 	return cfg;
55 | }
56 | 
57 | int MReLU::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
58 | 
59 | 	int count = inputs[0].count();
60 | 	auto grid = gridDims(count);
61 | 	auto block = blockDims(count);
62 | 	float bias = *this->config_->weights_[0]->cpu<float>();
63 | 
64 | 	if (config_->configDataType_ == TRTInfer::DataType::dtFloat) {
65 | 		MReLUKernel <<<grid, block >>> (inputs[0].ptr<float>(), outputs[0].ptr<float>(), bias, count);
66 | 	}
67 | 	else if (config_->configDataType_ == TRTInfer::DataType::dtHalfloat) {
68 | 		MReLUKernel <<<grid, block>>> (inputs[0].ptr<halfloat>(), outputs[0].ptr<halfloat>(), halfloat(bias), count);
69 | 	}
70 | 	return 0;
71 | }
72 | 
73 | RegisterPlugin(MReLU);


--------------------------------------------------------------------------------
/src/onnxplugin/plugins/MReLU.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef WReLU_HPP
 3 | #define WReLU_HPP
 4 | 
 5 | #include <onnxplugin/onnxplugin.hpp>
 6 | 
 7 | using namespace ONNXPlugin;
 8 | 
 9 | class MReLUConfig : public LayerConfig{
10 | public:
11 | 	virtual void init() override;
12 | };
13 | 
14 | class MReLU : public TRTPlugin {
15 | public:
16 | 	SetupPlugin(MReLU);
17 | 
18 | 	virtual std::shared_ptr<LayerConfig> config(const std::string& layerName) override;
19 | 
20 | 	//这个插件只有一个输出，输出的shape等于输入0的shape，因此返回input0的shape
21 | 	virtual nvinfer1::Dims outputDims(int index, const nvinfer1::Dims* inputDims, int nbInputDims) override;
22 | 
23 | 	//执行过程
24 | 	int enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) override;
25 | };
26 | 
27 | #endif //WReLU_HPP


--------------------------------------------------------------------------------
/workspace/imgs/000020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/000020.jpg


--------------------------------------------------------------------------------
/workspace/imgs/000023.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/000023.jpg


--------------------------------------------------------------------------------
/workspace/imgs/17790319373_bd19b24cfc_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/17790319373_bd19b24cfc_k.jpg


--------------------------------------------------------------------------------
/workspace/imgs/selfie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/selfie.jpg


--------------------------------------------------------------------------------
/workspace/imgs/www.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/imgs/www.jpg


--------------------------------------------------------------------------------
/workspace/logs/2020-04-15.log:
--------------------------------------------------------------------------------
 1 | W[2020-04-15 21:47:53:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:19]:onnx to trtmodel...
 2 | W[2020-04-15 21:47:53:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:96]:Build FP32 trtmodel.
 3 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:33]:init MReLU config: {"kernel_size": 3, "eps": 0.03, "other": "Hello Onnx Plugin"}
 4 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:34]:MReLU weights = 1[1 x 1 x 1 x 1]
 5 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:38]:MReLU kernel_size: 3
 6 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:39]:MReLU eps: 0.03
 7 | I[2020-04-15 21:47:54:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:40]:MReLU other: Hello Onnx Plugin
 8 | I[2020-04-15 21:47:54:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:200]:input shape: 3 x 5 x 5
 9 | W[2020-04-15 21:47:54:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:201]:Set max batch size: 4
10 | W[2020-04-15 21:47:54:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:206]:Build engine
11 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:26]:done.
12 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:28]:load model: models/demo.fp32.trtmodel
13 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:33]:init MReLU config: {"kernel_size": 3, "eps": 0.03, "other": "Hello Onnx Plugin"}
14 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:34]:MReLU weights = 1[1 x 1 x 1 x 1]
15 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:38]:MReLU kernel_size: 3
16 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:39]:MReLU eps: 0.03
17 | I[2020-04-15 21:47:55:G:/code/TensorRT/tensorRTIntegrate/src/onnxplugin/plugins/MReLU.cu:40]:MReLU other: Hello Onnx Plugin
18 | W[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\builder\trt_builder.cpp:35]:NVInfer WARNING: Current optimization profile is: 0. Please ensure there are no enqueued operations pending in this context prior to switching profiles
19 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:35]:forward...
20 | I[2020-04-15 21:47:55:g:\code\tensorrt\tensorrtintegrate\src\examples\onnx.cpp:42]:done.
21 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\dbface.cpp:140]:models/dbface.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dbface.onnx
22 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_net_coco2x_dcn.cpp:121]:onnx to trtmodel...
23 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_net_coco2x_dcn.cpp:132]:models/dladcnv2.onnx not found, download url: http://zifuture.com:1000/fs/public_models/dladcnv2.onnx or use centerNetDLADCNOnnX/dladcn_export_onnx.py to generate
24 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_track_coco_tracking.cpp:131]:onnx to trtmodel...
25 | W[2020-04-15 21:47:56:g:\code\tensorrt\tensorrtintegrate\src\examples\center_track_coco_tracking.cpp:139]:models/coco_tracking.onnx not found, download url: http://zifuture.com:1000/fs/public_models/coco_tracking.onnx
26 | 


--------------------------------------------------------------------------------
/workspace/models/.gitignore:
--------------------------------------------------------------------------------
1 | dladcnv2.onnx
2 | dbface.onnx
3 | coco_tracking.onnx
4 | *.trtmodel


--------------------------------------------------------------------------------
/workspace/models/demo.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/models/demo.onnx


--------------------------------------------------------------------------------
/workspace/results/0.centernet.coco2x.dcn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/0.centernet.coco2x.dcn.jpg


--------------------------------------------------------------------------------
/workspace/results/1.centernet.coco2x.dcn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/1.centernet.coco2x.dcn.jpg


--------------------------------------------------------------------------------
/workspace/results/coco.tracking.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/coco.tracking.jpg


--------------------------------------------------------------------------------
/workspace/results/selfie.draw.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlunion/tensorRTIntegrate/145aec3faeef0d761a8f2752951deede2ed661a6/workspace/results/selfie.draw.jpg


--------------------------------------------------------------------------------