├── .gitignore ├── .vscode └── launch.json ├── CMakeLists.txt ├── CMakeLists.txt.user ├── README.md ├── SourceMnist.cpp ├── cublasTest.cpp ├── data └── tabby_tiger_cat.jpg ├── einsum ├── CMakeLists.txt ├── Einsum.cpp ├── Einsum.h ├── build │ ├── .cmake │ │ └── api │ │ │ └── v1 │ │ │ ├── query │ │ │ └── client-vscode │ │ │ │ └── query.json │ │ │ └── reply │ │ │ ├── cache-v2-8b57e125de1d3e63be5c.json │ │ │ ├── codemodel-v2-91dcb8fb09f012c6e06f.json │ │ │ ├── index-2021-06-27T04-48-55-0999.json │ │ │ ├── target-einsum_common8_lib-Debug-3718d237bae652e49c1b.json │ │ │ ├── target-einsumlib-Debug-0f07c3f3305b026cdd92.json │ │ │ └── toolchains-v1-7e03fe365e7edf64e679.json │ ├── CMakeCache.txt │ ├── CMakeFiles │ │ ├── 3.20.5 │ │ │ ├── CMakeCCompiler.cmake │ │ │ ├── CMakeCXXCompiler.cmake │ │ │ ├── CMakeDetermineCompilerABI_C.bin │ │ │ ├── CMakeDetermineCompilerABI_CXX.bin │ │ │ ├── CMakeSystem.cmake │ │ │ ├── CompilerIdC │ │ │ │ ├── CMakeCCompilerId.c │ │ │ │ └── a.out │ │ │ └── CompilerIdCXX │ │ │ │ ├── CMakeCXXCompilerId.cpp │ │ │ │ └── a.out │ │ ├── CMakeDirectoryInformation.cmake │ │ ├── CMakeError.log │ │ ├── CMakeOutput.log │ │ ├── Progress │ │ │ ├── 2 │ │ │ ├── 5 │ │ │ └── count.txt │ │ ├── TargetDirectories.txt │ │ ├── cmake.check_cache │ │ ├── einsum.dir │ │ │ ├── DependInfo.cmake │ │ │ ├── Einsum.cpp.o.d │ │ │ ├── build.make │ │ │ ├── cmake_clean.cmake │ │ │ ├── cmake_clean_target.cmake │ │ │ ├── compiler_depend.internal │ │ │ ├── compiler_depend.make │ │ │ ├── compiler_depend.ts │ │ │ ├── depend.make │ │ │ ├── flags.make │ │ │ ├── link.txt │ │ │ └── progress.make │ │ ├── einsumlib.dir │ │ │ ├── DependInfo.cmake │ │ │ ├── Einsum.cpp.o.d │ │ │ ├── build.make │ │ │ ├── cmake_clean.cmake │ │ │ ├── cmake_clean_target.cmake │ │ │ ├── compiler_depend.make │ │ │ ├── compiler_depend.ts │ │ │ ├── depend.make │ │ │ ├── flags.make │ │ │ ├── link.txt │ │ │ └── progress.make │ │ ├── progress.marks │ │ └── test.dir │ │ │ ├── DependInfo.cmake │ │ │ ├── Einsum.cpp.o.d │ │ │ ├── build.make │ │ │ ├── cmake_clean.cmake │ │ │ ├── compiler_depend.make │ │ │ ├── compiler_depend.ts │ │ │ ├── depend.make │ │ │ ├── flags.make │ │ │ ├── link.txt │ │ │ ├── onnx2trt_gcn.cpp.o.d │ │ │ └── progress.make │ ├── cmake_install.cmake │ ├── common │ │ ├── CMakeFiles │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ ├── commonlib.dir │ │ │ │ ├── DependInfo.cmake │ │ │ │ ├── build.make │ │ │ │ ├── checkMacrosPlugin.cpp.o.d │ │ │ │ ├── cmake_clean.cmake │ │ │ │ ├── cmake_clean_target.cmake │ │ │ │ ├── compiler_depend.make │ │ │ │ ├── compiler_depend.ts │ │ │ │ ├── cudaDriverWrapper.cpp.o.d │ │ │ │ ├── depend.make │ │ │ │ ├── flags.make │ │ │ │ ├── getOptions.cpp.o.d │ │ │ │ ├── link.txt │ │ │ │ ├── logger.cpp.o.d │ │ │ │ ├── nmsHelper.cpp.o.d │ │ │ │ ├── progress.make │ │ │ │ ├── reducedMathPlugin.cpp.o.d │ │ │ │ ├── sampleEngines.cpp.o.d │ │ │ │ ├── sampleInference.cpp.o.d │ │ │ │ ├── sampleOptions.cpp.o.d │ │ │ │ └── sampleReporting.cpp.o.d │ │ │ └── progress.marks │ │ ├── cmake_install.cmake │ │ └── kernels │ │ │ ├── CMakeFiles │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ └── progress.marks │ │ │ └── cmake_install.cmake │ ├── compile_commands.json │ ├── einsum_common7 │ │ ├── CMakeFiles │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ ├── einsum_common7_lib.dir │ │ │ │ ├── DependInfo.cmake │ │ │ │ ├── build.make │ │ │ │ ├── checkMacrosPlugin.cpp.o.d │ │ │ │ ├── cmake_clean.cmake │ │ │ │ ├── cmake_clean_target.cmake │ │ │ │ ├── compiler_depend.internal │ │ │ │ ├── compiler_depend.make │ │ │ │ ├── compiler_depend.ts │ │ │ │ ├── cudaDriverWrapper.cpp.o.d │ │ │ │ ├── depend.make │ │ │ │ ├── flags.make │ │ │ │ ├── getOptions.cpp.o.d │ │ │ │ ├── link.txt │ │ │ │ ├── logger.cpp.o.d │ │ │ │ ├── nmsHelper.cpp.o.d │ │ │ │ ├── progress.make │ │ │ │ ├── reducedMathPlugin.cpp.o.d │ │ │ │ ├── sampleEngines.cpp.o.d │ │ │ │ ├── sampleInference.cpp.o.d │ │ │ │ ├── sampleOptions.cpp.o.d │ │ │ │ └── sampleReporting.cpp.o.d │ │ │ └── progress.marks │ │ ├── cmake_install.cmake │ │ └── kernels │ │ │ ├── CMakeFiles │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ └── progress.marks │ │ │ └── cmake_install.cmake │ ├── einsum_common8 │ │ ├── CMakeFiles │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ ├── einsum_common8_lib.dir │ │ │ │ ├── DependInfo.cmake │ │ │ │ ├── build.make │ │ │ │ ├── checkMacrosPlugin.cpp.o.d │ │ │ │ ├── cmake_clean.cmake │ │ │ │ ├── cmake_clean_target.cmake │ │ │ │ ├── compiler_depend.make │ │ │ │ ├── compiler_depend.ts │ │ │ │ ├── depend.make │ │ │ │ ├── flags.make │ │ │ │ ├── getOptions.cpp.o.d │ │ │ │ ├── link.txt │ │ │ │ ├── logger.cpp.o.d │ │ │ │ ├── progress.make │ │ │ │ ├── reducedMathPlugin.cpp.o.d │ │ │ │ ├── sampleEngines.cpp.o.d │ │ │ │ ├── sampleInference.cpp.o.d │ │ │ │ ├── sampleOptions.cpp.o.d │ │ │ │ └── sampleReporting.cpp.o.d │ │ │ └── progress.marks │ │ ├── cmake_install.cmake │ │ └── kernels │ │ │ ├── CMakeFiles │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ └── progress.marks │ │ │ └── cmake_install.cmake │ └── test ├── einsum_common7 │ ├── BatchStream.h │ ├── CMakeLists.txt │ ├── EntropyCalibrator.h │ ├── ErrorRecorder.h │ ├── argsParser.h │ ├── bboxUtils.h │ ├── bertCommon.h │ ├── buffers.h │ ├── checkMacrosPlugin.cpp │ ├── checkMacrosPlugin.h │ ├── common.cuh │ ├── common.h │ ├── cub_helper.h │ ├── cudaDriverWrapper.cpp │ ├── cudaDriverWrapper.h │ ├── getOptions.cpp │ ├── getOptions.h │ ├── half.h │ ├── kernels │ │ ├── CMakeLists.txt │ │ ├── allClassNMS.cu │ │ ├── bboxDeltas2Proposals.cu │ │ ├── common.cu │ │ ├── cropAndResizeKernel.cu │ │ ├── decodeBBoxes.cu │ │ ├── detectionForward.cu │ │ ├── extractFgScores.cu │ │ ├── gatherTopDetections.cu │ │ ├── generateAnchors.cu │ │ ├── gridAnchorLayer.cu │ │ ├── kernel.cpp │ │ ├── kernel.h │ │ ├── lReLU.cu │ │ ├── maskRCNNKernels.cu │ │ ├── maskRCNNKernels.h │ │ ├── nmsLayer.cu │ │ ├── normalizeLayer.cu │ │ ├── permuteData.cu │ │ ├── priorBoxLayer.cu │ │ ├── proposalKernel.cu │ │ ├── proposalsForward.cu │ │ ├── reducedMathPlugin.h │ │ ├── regionForward.cu │ │ ├── reorgForward.cu │ │ ├── roiPooling.cu │ │ ├── rproiInferenceFused.cu │ │ ├── sortScoresPerClass.cu │ │ └── sortScoresPerImage.cu │ ├── logger.cpp │ ├── logger.h │ ├── logging.h │ ├── nmsHelper.cpp │ ├── nmsUtils.h │ ├── parserOnnxConfig.h │ ├── plugin.h │ ├── pluginLogger.h │ ├── pluginLogging.h │ ├── reducedMathPlugin.cpp │ ├── sampleConfig.h │ ├── sampleDevice.h │ ├── sampleEngines.cpp │ ├── sampleEngines.h │ ├── sampleInference.cpp │ ├── sampleInference.h │ ├── sampleOptions.cpp │ ├── sampleOptions.h │ ├── sampleReporting.cpp │ ├── sampleReporting.h │ ├── sampleUtils.h │ └── serialize.hpp └── einsum_common8 │ ├── BatchStream.h │ ├── CMakeLists.txt │ ├── EntropyCalibrator.h │ ├── ErrorRecorder.h │ ├── argsParser.h │ ├── bboxUtils.h │ ├── bertCommon.h │ ├── buffers.h │ ├── checkMacrosPlugin.cpp │ ├── checkMacrosPlugin.h │ ├── common.cuh │ ├── common.h │ ├── cub_helper.h │ ├── cudaDriverWrapper.cpp │ ├── cudaDriverWrapper.h │ ├── getOptions.cpp │ ├── getOptions.h │ ├── half.h │ ├── kernels │ ├── CMakeLists.txt │ ├── allClassNMS.cu │ ├── bboxDeltas2Proposals.cu │ ├── common.cu │ ├── cropAndResizeKernel.cu │ ├── decodeBBoxes.cu │ ├── detectionForward.cu │ ├── extractFgScores.cu │ ├── gatherTopDetections.cu │ ├── generateAnchors.cu │ ├── gridAnchorLayer.cu │ ├── kernel.cpp │ ├── kernel.h │ ├── lReLU.cu │ ├── maskRCNNKernels.cu │ ├── maskRCNNKernels.h │ ├── nmsLayer.cu │ ├── normalizeLayer.cu │ ├── permuteData.cu │ ├── priorBoxLayer.cu │ ├── proposalKernel.cu │ ├── proposalsForward.cu │ ├── reducedMathPlugin.h │ ├── regionForward.cu │ ├── reorgForward.cu │ ├── roiPooling.cu │ ├── rproiInferenceFused.cu │ ├── sortScoresPerClass.cu │ └── sortScoresPerImage.cu │ ├── logger.cpp │ ├── logger.h │ ├── logging.h │ ├── nmsHelper.cpp │ ├── nmsUtils.h │ ├── parserOnnxConfig.h │ ├── plugin.h │ ├── pluginLogger.h │ ├── pluginLogging.h │ ├── reducedMathPlugin.cpp │ ├── safeCommon.h │ ├── sampleConfig.h │ ├── sampleDevice.h │ ├── sampleEngines.cpp │ ├── sampleEngines.h │ ├── sampleInference.cpp │ ├── sampleInference.h │ ├── sampleOptions.cpp │ ├── sampleOptions.h │ ├── sampleReporting.cpp │ ├── sampleReporting.h │ ├── sampleUtils.h │ └── serialize.hpp ├── function ├── CMakeLists.txt ├── TrtInfer.cpp ├── function.cpp └── image.cpp ├── gcn.onnx ├── generate_onnx.py ├── include ├── Head.h ├── TrtInfer.h ├── function.h └── image.hpp ├── infer_test.cpp ├── mnist.cpp ├── mytest.cpp ├── onnx2trt.cpp ├── onnx2trt_gcn.cpp ├── resnet50.cpp ├── sample.cpp ├── temp.cpp └── testNet.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # This file is used to ignore files which are generated 2 | # ---------------------------------------------------------------------------- 3 | *bin/* 4 | *build/* 5 | /data/ 6 | !/data/*.jpg 7 | *~ 8 | *.autosave 9 | *.a 10 | *.core 11 | *.moc 12 | *.o 13 | *.obj 14 | *.orig 15 | *.rej 16 | *.so 17 | *.so.* 18 | *_pch.h.cpp 19 | *_resource.rc 20 | *.qm 21 | .#* 22 | *.*# 23 | core 24 | !core/ 25 | tags 26 | .DS_Store 27 | .directory 28 | *.debug 29 | Makefile* 30 | *.prl 31 | *.app 32 | moc_*.cpp 33 | ui_*.h 34 | qrc_*.cpp 35 | Thumbs.db 36 | *.res 37 | *.rc 38 | /.qmake.cache 39 | /.qmake.stash 40 | 41 | # qtcreator generated files 42 | *.pro.user* 43 | 44 | # xemacs temporary files 45 | *.flc 46 | 47 | # Vim temporary files 48 | .*.swp 49 | 50 | # Visual Studio generated files 51 | *.ib_pdb_index 52 | *.idb 53 | *.ilk 54 | *.pdb 55 | *.sln 56 | *.suo 57 | *.vcproj 58 | *vcproj.*.*.user 59 | *.ncb 60 | *.sdf 61 | *.opensdf 62 | *.vcxproj 63 | *vcxproj.* 64 | 65 | # MinGW generated files 66 | *.Debug 67 | *.Release 68 | 69 | # Python byte code 70 | *.pyc 71 | 72 | # Binaries 73 | # -------- 74 | *.dll 75 | *.exe 76 | 77 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 IntelliSense 了解相关属性。 3 | // 悬停以查看现有属性的描述。 4 | // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "(gdb) 启动", 9 | "type": "cppdbg", 10 | "request": "launch", 11 | "program": "${workspaceFolder}/bin/onnx2trt_gcn", 12 | "args": [], 13 | "stopAtEntry": false, 14 | "cwd": "${fileDirname}", 15 | "environment": [], 16 | "externalConsole": false, 17 | "MIMode": "gdb", 18 | "setupCommands": [ 19 | { 20 | "description": "为 gdb 启用整齐打印", 21 | "text": "-enable-pretty-printing", 22 | "ignoreFailures": true 23 | } 24 | ] 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | project(TensorRT) 4 | 5 | set(CMAKE_BUILD_TYPE Debug) 6 | #set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -o0 -Wall -g -ggdb") 7 | set(CMAKE_CXX_STANDARD 11) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 9 | 10 | set(OpenCV_ROOT "/usr/local/opencv4.4.0/lib/cmake/opencv4") 11 | set(OpenCV_DIR "/usr/local/opencv4.4.0/lib/cmake/opencv4") 12 | find_package(OpenCV REQUIRED) 13 | INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS}) 14 | LINK_LIBRARIES(${OpenCV_LIBS}) 15 | 16 | add_subdirectory(einsum) 17 | LINK_LIBRARIES(einsumlib) 18 | 19 | INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}) # 设定h文件路径 20 | add_subdirectory(function function) 21 | LINK_LIBRARIES(functionlib) 22 | 23 | SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) 24 | #add_executable(infer_test "infer_test.cpp") 25 | #add_executable(testNet "testNet.cpp") 26 | #add_executable(mnist "mnist.cpp") 27 | #add_executable(resnet50 "resnet50.cpp") 28 | #add_executable(SourceMnist "SourceMnist.cpp") 29 | #add_executable(onnx2trt "onnx2trt.cpp") 30 | add_executable(onnx2trt_gcn "onnx2trt_gcn.cpp") 31 | #add_executable(sample "sample.cpp") 32 | #add_executable(temp "sample.cpp") 33 | 34 | ######################################### 35 | # test 36 | #add_executable(cublasTest "cublasTest.cpp") 37 | #target_link_libraries(cublasTest cublas) 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 前言 2 | 3 | 由于TensorRT并未实现Einsum插件,而在转换GCN的过程中,网络频繁使用该op,因此,不得已手写plugin,顺便也学习了一下plugin的编写及注册方法。本仓库也旨在演示如何自定义编写的plugin。 4 | 5 | 在plugin文件内,对每个成员函数的功能都进行了简单的注释(可能会有很多不清楚的地方,建议自行百度详细了解一下) 6 | 7 | 强烈推荐这个教程[实现TensorRT自定义插件(plugin)自由](https://zhuanlan.zhihu.com/p/297002406),按照这个教程肯定可以生成可以用的Plugin,目前网上搜到的都是直接重新编译生成新的`libnvinfer_plugin.so`替换官方原有的该库,然而这种方法在TensorRT-OSS版本不匹配TensorRT时就不能使用了,因此这里采用另一种更加灵活的方法:**直接将EinsumPlugin编译到自己的项目工程里即可**。 8 | 9 | 本仓库**只实现了`nctkv,kvw->nctw`操作**,其他结构也都类似,自行修改插件,制作适合自己版本的即可。 10 | 11 | ## 环境 12 | 13 | > TensorRT8.0 14 | 15 | TensorRT8.0相比以前版本的最大区别就是该版本的plugin必须在每个成员函数之后增加一个`throw()`,看`Einsum.cpp`程序就懂了。TensorRT7.0也可以使用,不需要修改任何代码。 16 | 17 | 如果想要使用TensorRT7,直接在`einsum/CMakeLists.txt`内修改TensorRT路径,并切换到`einsum_common7`即可 18 | 19 | ## 使用流程 20 | 21 | 参考根目录下的`CMakeLists.txt`将einsum添加到自己的项目之中即可。 22 | 23 | ==记得一定要在模型解析的源文件内(如这里的onnx2trt_gcn.cpp),使用`REGISTER_TENSORRT_PLUGIN(EinsumCreator)`来注册Einsum插件,这样解析时才可以找到== 24 | 25 | **测试用例使用方法** 26 | 27 | 1. 运行`generate_onnx.py`生成测试的onnx文件 28 | 2. 编译该仓库,然后运行`onnnx2trt_gcn`,如果没报错就说明该einsum插件没有问题 29 | 30 | ## 编写流程 31 | 32 | 1. 从TensorRT-OSS的plugin文件内,拷贝一个官方的例程,然后直接在里面的编写就可以了,替换掉对应函数的内容即可 33 | 2. 关于自定义Plugin的依赖库问题,他主要依赖`TensorRT-OSS/plugin/common`和`TensorRT-(版本号)/samples/common`下的库,因此要将对应版本的`TensorRT-OSS和TensorRT`该路径下的文件全部复制到一个文件夹下(如本repo中的`einsum/einsum_common*`文件夹内),然后使用cmake编译生成对应的库,并将该库链接到`einsum.cpp`即可 34 | 35 | ## 细节 36 | 37 | 由于本仓库的示例中与EinsumPlugin的插件依赖库相同,因此采用`target_link_libraries(PUBLIC)`将库向上连接到示例程序`onnx2trt_gcn` 38 | 39 | 根目录下只会使用`onnx2trt_gcn.cpp`,其他的cpp文件不需要,可以删除。 -------------------------------------------------------------------------------- /SourceMnist.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "NvInferRuntime.h" 3 | #include "NvInferRuntimeCommon.h" 4 | #include "NvOnnxConfig.h" 5 | #include "NvOnnxParser.h" 6 | #include "NvUtils.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace nvinfer1; 14 | 15 | class Logger : public ILogger 16 | { 17 | void log(Severity severity, const char* msg) override 18 | { 19 | // suppress info-level messages 20 | if (severity != Severity::kINFO) 21 | std::cout << msg << std::endl; 22 | } 23 | } gLogger; 24 | 25 | void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, int inW) 26 | { 27 | std::ifstream infile(fileName, std::ifstream::binary); 28 | std::string magic, h, w, max; 29 | infile >> magic >> h >> w >> max; 30 | infile.seekg(1, infile.cur); 31 | infile.read(reinterpret_cast(buffer), inH * inW); 32 | } 33 | 34 | struct InferDeleter 35 | { 36 | template 37 | void operator()(T* obj) const{ 38 | if (obj) 39 | { 40 | obj->destroy(); 41 | } 42 | } 43 | }; 44 | 45 | struct CudaDeleter 46 | { 47 | void operator()(void* obj){ 48 | if (obj) 49 | { 50 | cudaFree(obj); 51 | } 52 | } 53 | }; 54 | 55 | const char* onnxModelFile = "../data/mnist.onnx"; 56 | const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 57 | 58 | int main() 59 | { 60 | std::unique_ptr builder(createInferBuilder(gLogger)); 61 | builder->setMaxBatchSize(1); 62 | 63 | std::unique_ptr network(builder->createNetworkV2(explicitBatch)); 64 | std::unique_ptr parser(nvonnxparser::createParser(*network, gLogger)); 65 | parser->parseFromFile(onnxModelFile, static_cast(ILogger::Severity::kWARNING)); 66 | 67 | std::unique_ptr config(builder->createBuilderConfig()); 68 | config->setMaxWorkspaceSize(1 << 20); 69 | config->setFlag(BuilderFlag::kGPU_FALLBACK); 70 | config->setFlag(BuilderFlag::kSTRICT_TYPES); 71 | 72 | std::unique_ptr engine(builder->buildEngineWithConfig(*network, *config)); 73 | std::unique_ptr context(engine->createExecutionContext()); 74 | 75 | vector input_cpu_data, output_cpu_data; 76 | input_cpu_data.resize(28*28); 77 | output_cpu_data.resize(10); 78 | 79 | // Load pgm image 80 | std::vector fileData(28 * 28); 81 | readPGMFile("../data/9.pgm", fileData.data(), 28, 28); 82 | for(int i = 0; i < 28*28; ++i){ 83 | input_cpu_data[i] = 1.0 - (fileData[i]/255.0); 84 | std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % 28) ? "" : "\n"); 85 | } 86 | 87 | 88 | cudaStream_t stream; 89 | cudaStreamCreate(&stream); 90 | 91 | int size_input = sizeof(float)*28*28; 92 | int size_output = sizeof(float)*10; 93 | 94 | void *input_gpu_data_ptr, *output_gpu_data_ptr; 95 | std::unique_ptr input_gpu_data, output_gpu_data; 96 | cudaMalloc(&input_gpu_data_ptr, size_input); 97 | cudaMalloc(&output_gpu_data_ptr, size_output); 98 | input_gpu_data.reset(input_gpu_data_ptr); 99 | output_gpu_data.reset(output_gpu_data_ptr); 100 | 101 | void* buffers[2]; 102 | buffers[0] = input_gpu_data.get(); 103 | buffers[1] = output_gpu_data.get(); 104 | 105 | cudaMemcpyAsync(buffers[0], input_cpu_data.data(), size_input, cudaMemcpyHostToDevice, stream); 106 | 107 | bool is_success = context->executeV2(buffers); 108 | if(is_success) 109 | std::cout << "Forward success !" << std::endl; 110 | else 111 | std::cout << "Forward Error !" << std::endl; 112 | 113 | cudaMemcpyAsync(output_cpu_data.data(), buffers[1], size_output, cudaMemcpyDeviceToHost, stream); 114 | cudaStreamSynchronize(stream); 115 | cudaStreamDestroy(stream); 116 | 117 | // softmax 118 | float sum{0.0f}; 119 | for (int i = 0; i < 10; i++){ 120 | output_cpu_data[i] = exp(output_cpu_data[i]); 121 | sum += output_cpu_data[i]; 122 | } 123 | 124 | // output 125 | for(int i = 0; i < 10; ++i){ 126 | output_cpu_data[i] /= sum; 127 | std::cout << i << ": " << std::string(floor(output_cpu_data[i] * 10 + 0.5f), '*') << "\n"; 128 | } 129 | 130 | return 1; 131 | } 132 | 133 | -------------------------------------------------------------------------------- /cublasTest.cpp: -------------------------------------------------------------------------------- 1 | // CUDA runtime 库 + CUBLAS 库 2 | #include "cuda_runtime.h" 3 | #include "cublas_v2.h" 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | // 定义测试矩阵的维度 10 | int const A_ROW = 1; 11 | int const A_COL = 45; 12 | int const B_ROW = 45; 13 | int const B_COL = 15; 14 | 15 | int main() 16 | { 17 | // 定义状态变量 18 | cublasStatus_t status; 19 | float *h_A,*h_B,*h_C; //存储于内存中的矩阵 20 | h_A = (float*)malloc(sizeof(float)*A_ROW*A_COL); //在内存中开辟空间 21 | h_B = (float*)malloc(sizeof(float)*B_ROW*B_COL); 22 | h_C = (float*)malloc(sizeof(float)*A_ROW*B_COL); 23 | 24 | // 为待运算矩阵的元素赋予 0-10 范围内的随机数 25 | for (int i=0; i 24 | 25 | static void* test_func(void* data) 26 | { 27 | return data; 28 | } 29 | 30 | int main(void) 31 | { 32 | pthread_t thread; 33 | pthread_create(&thread, NULL, test_func, NULL); 34 | pthread_detach(thread); 35 | pthread_cancel(thread); 36 | pthread_join(thread, NULL); 37 | pthread_atfork(NULL, NULL, NULL); 38 | pthread_exit(NULL); 39 | 40 | return 0; 41 | } 42 | 43 | Determining if the function pthread_create exists in the pthreads failed with the following output: 44 | Change Dir: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp 45 | 46 | Run Build Command(s):/usr/bin/make -f Makefile cmTC_2b59d/fast && /usr/bin/make -f CMakeFiles/cmTC_2b59d.dir/build.make CMakeFiles/cmTC_2b59d.dir/build 47 | make[1]: 进入目录“/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp” 48 | Building C object CMakeFiles/cmTC_2b59d.dir/CheckFunctionExists.c.o 49 | /bin/x86_64-linux-gnu-gcc-7 -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create -o CMakeFiles/cmTC_2b59d.dir/CheckFunctionExists.c.o -c /snap/cmake/888/share/cmake-3.20/Modules/CheckFunctionExists.c 50 | Linking C executable cmTC_2b59d 51 | /snap/cmake/888/bin/cmake -E cmake_link_script CMakeFiles/cmTC_2b59d.dir/link.txt --verbose=1 52 | /bin/x86_64-linux-gnu-gcc-7 -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create CMakeFiles/cmTC_2b59d.dir/CheckFunctionExists.c.o -o cmTC_2b59d -lpthreads 53 | /usr/bin/ld: 找不到 -lpthreads 54 | collect2: error: ld returned 1 exit status 55 | make[1]: *** [CMakeFiles/cmTC_2b59d.dir/build.make:99:cmTC_2b59d] 错误 1 56 | make[1]: 离开目录“/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp” 57 | make: *** [Makefile:127:cmTC_2b59d/fast] 错误 2 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/Progress/2: -------------------------------------------------------------------------------- 1 | empty -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/Progress/5: -------------------------------------------------------------------------------- 1 | empty -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/Progress/count.txt: -------------------------------------------------------------------------------- 1 | 16 2 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/TargetDirectories.txt: -------------------------------------------------------------------------------- 1 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/rebuild_cache.dir 2 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/einsumlib.dir 3 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/edit_cache.dir 4 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/rebuild_cache.dir 5 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir 6 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/edit_cache.dir 7 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/kernels/CMakeFiles/rebuild_cache.dir 8 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/kernels/CMakeFiles/edit_cache.dir 9 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/cmake.check_cache: -------------------------------------------------------------------------------- 1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file 2 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/Einsum.cpp" "CMakeFiles/einsum.dir/Einsum.cpp.o" "gcc" "CMakeFiles/einsum.dir/Einsum.cpp.o.d" 12 | ) 13 | 14 | # Targets to which this target links. 15 | set(CMAKE_TARGET_LINKED_INFO_FILES 16 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/common/CMakeFiles/commonlib.dir/DependInfo.cmake" 17 | ) 18 | 19 | # Fortran module output directory. 20 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 21 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "CMakeFiles/einsum.dir/Einsum.cpp.o" 3 | "CMakeFiles/einsum.dir/Einsum.cpp.o.d" 4 | "libeinsum.a" 5 | "libeinsum.pdb" 6 | ) 7 | 8 | # Per-language clean rules from dependency scanning. 9 | foreach(lang CXX) 10 | include(CMakeFiles/einsum.dir/cmake_clean_${lang}.cmake OPTIONAL) 11 | endforeach() 12 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/cmake_clean_target.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "libeinsum.a" 3 | ) 4 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for einsum. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for einsum. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7 5 | CXX_DEFINES = 6 | 7 | CXX_INCLUDES = -I/home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-7.2.1.6/include -I/usr/local/cuda/include -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common -isystem /usr/local/opencv4.4.0/include/opencv4 8 | 9 | CXX_FLAGS = -g -std=gnu++14 10 | 11 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/link.txt: -------------------------------------------------------------------------------- 1 | /bin/x86_64-linux-gnu-ar qc libeinsum.a CMakeFiles/einsum.dir/Einsum.cpp.o 2 | /bin/x86_64-linux-gnu-ranlib libeinsum.a 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsum.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 12 2 | CMAKE_PROGRESS_2 = 13 3 | 4 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/Einsum.cpp" "CMakeFiles/einsumlib.dir/Einsum.cpp.o" "gcc" "CMakeFiles/einsumlib.dir/Einsum.cpp.o.d" 12 | ) 13 | 14 | # Targets to which this target links. 15 | set(CMAKE_TARGET_LINKED_INFO_FILES 16 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/DependInfo.cmake" 17 | ) 18 | 19 | # Fortran module output directory. 20 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 21 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "CMakeFiles/einsumlib.dir/Einsum.cpp.o" 3 | "CMakeFiles/einsumlib.dir/Einsum.cpp.o.d" 4 | "libeinsumlib.a" 5 | "libeinsumlib.pdb" 6 | ) 7 | 8 | # Per-language clean rules from dependency scanning. 9 | foreach(lang CXX) 10 | include(CMakeFiles/einsumlib.dir/cmake_clean_${lang}.cmake OPTIONAL) 11 | endforeach() 12 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/cmake_clean_target.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "libeinsumlib.a" 3 | ) 4 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/compiler_depend.make: -------------------------------------------------------------------------------- 1 | # Empty compiler generated dependencies file for einsumlib. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for einsumlib. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for einsumlib. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7 5 | CXX_DEFINES = 6 | 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8 8 | 9 | CXX_FLAGS = -g -std=gnu++11 10 | 11 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/link.txt: -------------------------------------------------------------------------------- 1 | /bin/x86_64-linux-gnu-ar qc libeinsumlib.a CMakeFiles/einsumlib.dir/Einsum.cpp.o 2 | /bin/x86_64-linux-gnu-ranlib libeinsumlib.a 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/einsumlib.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 12 2 | CMAKE_PROGRESS_2 = 13 3 | 4 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 13 2 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/test.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/Einsum.cpp" "CMakeFiles/test.dir/Einsum.cpp.o" "gcc" "CMakeFiles/test.dir/Einsum.cpp.o.d" 12 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/onnx2trt_gcn.cpp" "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o" "gcc" "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o.d" 13 | ) 14 | 15 | # Targets to which this target links. 16 | set(CMAKE_TARGET_LINKED_INFO_FILES 17 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/DependInfo.cmake" 18 | ) 19 | 20 | # Fortran module output directory. 21 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 22 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/test.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "CMakeFiles/test.dir/Einsum.cpp.o" 3 | "CMakeFiles/test.dir/Einsum.cpp.o.d" 4 | "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o" 5 | "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o.d" 6 | "test" 7 | "test.pdb" 8 | ) 9 | 10 | # Per-language clean rules from dependency scanning. 11 | foreach(lang CXX) 12 | include(CMakeFiles/test.dir/cmake_clean_${lang}.cmake OPTIONAL) 13 | endforeach() 14 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/test.dir/compiler_depend.make: -------------------------------------------------------------------------------- 1 | # Empty compiler generated dependencies file for test. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/test.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for test. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/test.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for test. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/test.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7 5 | CXX_DEFINES = 6 | 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8 8 | 9 | CXX_FLAGS = -g -std=gnu++11 10 | 11 | -------------------------------------------------------------------------------- /einsum/build/CMakeFiles/test.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 14 2 | CMAKE_PROGRESS_2 = 15 3 | CMAKE_PROGRESS_3 = 16 4 | 5 | -------------------------------------------------------------------------------- /einsum/build/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Debug") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump") 43 | endif() 44 | 45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY) 46 | # Include the install script for the subdirectory. 47 | include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/cmake_install.cmake") 48 | endif() 49 | 50 | if(CMAKE_INSTALL_COMPONENT) 51 | set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") 52 | else() 53 | set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") 54 | endif() 55 | 56 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT 57 | "${CMAKE_INSTALL_MANIFEST_FILES}") 58 | file(WRITE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/${CMAKE_INSTALL_MANIFEST}" 59 | "${CMAKE_INSTALL_MANIFEST_CONTENT}") 60 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/checkMacrosPlugin.cpp" "common/CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o.d" 12 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/cudaDriverWrapper.cpp" "common/CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o.d" 13 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/getOptions.cpp" "common/CMakeFiles/commonlib.dir/getOptions.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/getOptions.cpp.o.d" 14 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/logger.cpp" "common/CMakeFiles/commonlib.dir/logger.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/logger.cpp.o.d" 15 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/nmsHelper.cpp" "common/CMakeFiles/commonlib.dir/nmsHelper.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/nmsHelper.cpp.o.d" 16 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/reducedMathPlugin.cpp" "common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o.d" 17 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleEngines.cpp" "common/CMakeFiles/commonlib.dir/sampleEngines.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleEngines.cpp.o.d" 18 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleInference.cpp" "common/CMakeFiles/commonlib.dir/sampleInference.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleInference.cpp.o.d" 19 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleOptions.cpp" "common/CMakeFiles/commonlib.dir/sampleOptions.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleOptions.cpp.o.d" 20 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleReporting.cpp" "common/CMakeFiles/commonlib.dir/sampleReporting.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleReporting.cpp.o.d" 21 | ) 22 | 23 | # Targets to which this target links. 24 | set(CMAKE_TARGET_LINKED_INFO_FILES 25 | ) 26 | 27 | # Fortran module output directory. 28 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 29 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o" 3 | "CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o.d" 4 | "CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o" 5 | "CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o.d" 6 | "CMakeFiles/commonlib.dir/getOptions.cpp.o" 7 | "CMakeFiles/commonlib.dir/getOptions.cpp.o.d" 8 | "CMakeFiles/commonlib.dir/logger.cpp.o" 9 | "CMakeFiles/commonlib.dir/logger.cpp.o.d" 10 | "CMakeFiles/commonlib.dir/nmsHelper.cpp.o" 11 | "CMakeFiles/commonlib.dir/nmsHelper.cpp.o.d" 12 | "CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o" 13 | "CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o.d" 14 | "CMakeFiles/commonlib.dir/sampleEngines.cpp.o" 15 | "CMakeFiles/commonlib.dir/sampleEngines.cpp.o.d" 16 | "CMakeFiles/commonlib.dir/sampleInference.cpp.o" 17 | "CMakeFiles/commonlib.dir/sampleInference.cpp.o.d" 18 | "CMakeFiles/commonlib.dir/sampleOptions.cpp.o" 19 | "CMakeFiles/commonlib.dir/sampleOptions.cpp.o.d" 20 | "CMakeFiles/commonlib.dir/sampleReporting.cpp.o" 21 | "CMakeFiles/commonlib.dir/sampleReporting.cpp.o.d" 22 | "libcommonlib.a" 23 | "libcommonlib.pdb" 24 | ) 25 | 26 | # Per-language clean rules from dependency scanning. 27 | foreach(lang CXX) 28 | include(CMakeFiles/commonlib.dir/cmake_clean_${lang}.cmake OPTIONAL) 29 | endforeach() 30 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/cmake_clean_target.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "libcommonlib.a" 3 | ) 4 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/compiler_depend.make: -------------------------------------------------------------------------------- 1 | # Empty compiler generated dependencies file for commonlib. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for commonlib. 3 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for commonlib. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7 5 | CXX_DEFINES = 6 | 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common 8 | 9 | CXX_FLAGS = -g -std=gnu++14 10 | 11 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/link.txt: -------------------------------------------------------------------------------- 1 | /bin/x86_64-linux-gnu-ar qc libcommonlib.a CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o CMakeFiles/commonlib.dir/getOptions.cpp.o CMakeFiles/commonlib.dir/logger.cpp.o CMakeFiles/commonlib.dir/nmsHelper.cpp.o CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o CMakeFiles/commonlib.dir/sampleEngines.cpp.o CMakeFiles/commonlib.dir/sampleInference.cpp.o CMakeFiles/commonlib.dir/sampleOptions.cpp.o CMakeFiles/commonlib.dir/sampleReporting.cpp.o 2 | /bin/x86_64-linux-gnu-ranlib libcommonlib.a 3 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 1 2 | CMAKE_PROGRESS_2 = 2 3 | CMAKE_PROGRESS_3 = 3 4 | CMAKE_PROGRESS_4 = 4 5 | CMAKE_PROGRESS_5 = 5 6 | CMAKE_PROGRESS_6 = 6 7 | CMAKE_PROGRESS_7 = 7 8 | CMAKE_PROGRESS_8 = 8 9 | CMAKE_PROGRESS_9 = 9 10 | CMAKE_PROGRESS_10 = 10 11 | CMAKE_PROGRESS_11 = 11 12 | 13 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o.d: -------------------------------------------------------------------------------- 1 | common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o: \ 2 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/reducedMathPlugin.cpp \ 3 | /usr/include/stdc-predef.h 4 | -------------------------------------------------------------------------------- /einsum/build/common/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 11 2 | -------------------------------------------------------------------------------- /einsum/build/common/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Debug") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump") 43 | endif() 44 | 45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY) 46 | # Include the install script for the subdirectory. 47 | include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/common/kernels/cmake_install.cmake") 48 | endif() 49 | 50 | -------------------------------------------------------------------------------- /einsum/build/common/kernels/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /einsum/build/common/kernels/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /einsum/build/common/kernels/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/kernels 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Debug") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump") 43 | endif() 44 | 45 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/checkMacrosPlugin.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o.d" 12 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/cudaDriverWrapper.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o.d" 13 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/getOptions.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o.d" 14 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/logger.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/logger.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/logger.cpp.o.d" 15 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/nmsHelper.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o.d" 16 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/reducedMathPlugin.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o.d" 17 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleEngines.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o.d" 18 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleInference.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o.d" 19 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleOptions.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o.d" 20 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleReporting.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o.d" 21 | ) 22 | 23 | # Targets to which this target links. 24 | set(CMAKE_TARGET_LINKED_INFO_FILES 25 | ) 26 | 27 | # Fortran module output directory. 28 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 29 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o" 3 | "CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o.d" 4 | "CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o" 5 | "CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o.d" 6 | "CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o" 7 | "CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o.d" 8 | "CMakeFiles/einsum_common7_lib.dir/logger.cpp.o" 9 | "CMakeFiles/einsum_common7_lib.dir/logger.cpp.o.d" 10 | "CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o" 11 | "CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o.d" 12 | "CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o" 13 | "CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o.d" 14 | "CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o" 15 | "CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o.d" 16 | "CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o" 17 | "CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o.d" 18 | "CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o" 19 | "CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o.d" 20 | "CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o" 21 | "CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o.d" 22 | "libeinsum_common7_lib.a" 23 | "libeinsum_common7_lib.pdb" 24 | ) 25 | 26 | # Per-language clean rules from dependency scanning. 27 | foreach(lang CXX) 28 | include(CMakeFiles/einsum_common7_lib.dir/cmake_clean_${lang}.cmake OPTIONAL) 29 | endforeach() 30 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/cmake_clean_target.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "libeinsum_common7_lib.a" 3 | ) 4 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for einsum_common7_lib. 3 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for einsum_common7_lib. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7 5 | CXX_DEFINES = 6 | 7 | CXX_INCLUDES = -I/home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-7.2.1.6/include -I/usr/local/cuda/include -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/./einsum_common7 -isystem /usr/local/opencv4.4.0/include/opencv4 8 | 9 | CXX_FLAGS = -g -std=gnu++14 10 | 11 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/link.txt: -------------------------------------------------------------------------------- 1 | /bin/x86_64-linux-gnu-ar qc libeinsum_common7_lib.a CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o CMakeFiles/einsum_common7_lib.dir/logger.cpp.o CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o 2 | /bin/x86_64-linux-gnu-ranlib libeinsum_common7_lib.a 3 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 1 2 | CMAKE_PROGRESS_2 = 2 3 | CMAKE_PROGRESS_3 = 3 4 | CMAKE_PROGRESS_4 = 4 5 | CMAKE_PROGRESS_5 = 5 6 | CMAKE_PROGRESS_6 = 6 7 | CMAKE_PROGRESS_7 = 7 8 | CMAKE_PROGRESS_8 = 8 9 | CMAKE_PROGRESS_9 = 9 10 | CMAKE_PROGRESS_10 = 10 11 | CMAKE_PROGRESS_11 = 11 12 | 13 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o.d: -------------------------------------------------------------------------------- 1 | einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o: \ 2 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/reducedMathPlugin.cpp \ 3 | /usr/include/stdc-predef.h 4 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 11 2 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Debug") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump") 43 | endif() 44 | 45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY) 46 | # Include the install script for the subdirectory. 47 | include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common7/kernels/cmake_install.cmake") 48 | endif() 49 | 50 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/kernels/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/kernels/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /einsum/build/einsum_common7/kernels/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/kernels 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Debug") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump") 43 | endif() 44 | 45 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/checkMacrosPlugin.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o.d" 12 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/cudaDriverWrapper.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o.d" 13 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/getOptions.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o.d" 14 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/logger.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/logger.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/logger.cpp.o.d" 15 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/nmsHelper.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o.d" 16 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/reducedMathPlugin.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o.d" 17 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleEngines.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o.d" 18 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleInference.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o.d" 19 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleOptions.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o.d" 20 | "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleReporting.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o.d" 21 | ) 22 | 23 | # Targets to which this target links. 24 | set(CMAKE_TARGET_LINKED_INFO_FILES 25 | ) 26 | 27 | # Fortran module output directory. 28 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 29 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o" 3 | "CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o.d" 4 | "CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o" 5 | "CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o.d" 6 | "CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o" 7 | "CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o.d" 8 | "CMakeFiles/einsum_common8_lib.dir/logger.cpp.o" 9 | "CMakeFiles/einsum_common8_lib.dir/logger.cpp.o.d" 10 | "CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o" 11 | "CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o.d" 12 | "CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o" 13 | "CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o.d" 14 | "CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o" 15 | "CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o.d" 16 | "CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o" 17 | "CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o.d" 18 | "CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o" 19 | "CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o.d" 20 | "CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o" 21 | "CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o.d" 22 | "libeinsum_common8_lib.a" 23 | "libeinsum_common8_lib.pdb" 24 | ) 25 | 26 | # Per-language clean rules from dependency scanning. 27 | foreach(lang CXX) 28 | include(CMakeFiles/einsum_common8_lib.dir/cmake_clean_${lang}.cmake OPTIONAL) 29 | endforeach() 30 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/cmake_clean_target.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "libeinsum_common8_lib.a" 3 | ) 4 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/compiler_depend.make: -------------------------------------------------------------------------------- 1 | # Empty compiler generated dependencies file for einsum_common8_lib. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for einsum_common8_lib. 3 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for einsum_common8_lib. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7 5 | CXX_DEFINES = 6 | 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8 8 | 9 | CXX_FLAGS = -g -std=gnu++11 10 | 11 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/link.txt: -------------------------------------------------------------------------------- 1 | /bin/x86_64-linux-gnu-ar qc libeinsum_common8_lib.a CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o CMakeFiles/einsum_common8_lib.dir/logger.cpp.o CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o 2 | /bin/x86_64-linux-gnu-ranlib libeinsum_common8_lib.a 3 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 1 2 | CMAKE_PROGRESS_2 = 2 3 | CMAKE_PROGRESS_3 = 3 4 | CMAKE_PROGRESS_4 = 4 5 | CMAKE_PROGRESS_5 = 5 6 | CMAKE_PROGRESS_6 = 6 7 | CMAKE_PROGRESS_7 = 7 8 | CMAKE_PROGRESS_8 = 8 9 | CMAKE_PROGRESS_9 = 9 10 | CMAKE_PROGRESS_10 = 10 11 | CMAKE_PROGRESS_11 = 11 12 | 13 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o.d: -------------------------------------------------------------------------------- 1 | einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o: \ 2 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/reducedMathPlugin.cpp \ 3 | /usr/include/stdc-predef.h 4 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 11 2 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Debug") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump") 43 | endif() 44 | 45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY) 46 | # Include the install script for the subdirectory. 47 | include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/kernels/cmake_install.cmake") 48 | endif() 49 | 50 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/kernels/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/kernels/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 0 2 | -------------------------------------------------------------------------------- /einsum/build/einsum_common8/kernels/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/kernels 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Debug") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump") 43 | endif() 44 | 45 | -------------------------------------------------------------------------------- /einsum/build/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/einsum/build/test -------------------------------------------------------------------------------- /einsum/einsum_common7/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(kernels) 2 | AUX_SOURCE_DIRECTORY(./ DIR_LIB_SRCS) 3 | #SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) 4 | add_library(einsum_common_lib ${DIR_LIB_SRCS}) 5 | -------------------------------------------------------------------------------- /einsum/einsum_common7/EntropyCalibrator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef ENTROPY_CALIBRATOR_H 18 | #define ENTROPY_CALIBRATOR_H 19 | 20 | #include "BatchStream.h" 21 | #include "NvInfer.h" 22 | 23 | //! \class EntropyCalibratorImpl 24 | //! 25 | //! \brief Implements common functionality for Entropy calibrators. 26 | //! 27 | template 28 | class EntropyCalibratorImpl 29 | { 30 | public: 31 | EntropyCalibratorImpl( 32 | TBatchStream stream, int firstBatch, std::string networkName, const char* inputBlobName, bool readCache = true) 33 | : mStream{stream} 34 | , mCalibrationTableName("CalibrationTable" + networkName) 35 | , mInputBlobName(inputBlobName) 36 | , mReadCache(readCache) 37 | { 38 | nvinfer1::Dims dims = mStream.getDims(); 39 | mInputCount = samplesCommon::volume(dims); 40 | CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); 41 | mStream.reset(firstBatch); 42 | } 43 | 44 | virtual ~EntropyCalibratorImpl() 45 | { 46 | CHECK(cudaFree(mDeviceInput)); 47 | } 48 | 49 | int getBatchSize() const 50 | { 51 | return mStream.getBatchSize(); 52 | } 53 | 54 | bool getBatch(void* bindings[], const char* names[], int nbBindings) 55 | { 56 | if (!mStream.next()) 57 | { 58 | return false; 59 | } 60 | CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); 61 | assert(!strcmp(names[0], mInputBlobName)); 62 | bindings[0] = mDeviceInput; 63 | return true; 64 | } 65 | 66 | const void* readCalibrationCache(size_t& length) 67 | { 68 | mCalibrationCache.clear(); 69 | std::ifstream input(mCalibrationTableName, std::ios::binary); 70 | input >> std::noskipws; 71 | if (mReadCache && input.good()) 72 | { 73 | std::copy(std::istream_iterator(input), std::istream_iterator(), 74 | std::back_inserter(mCalibrationCache)); 75 | } 76 | length = mCalibrationCache.size(); 77 | return length ? mCalibrationCache.data() : nullptr; 78 | } 79 | 80 | void writeCalibrationCache(const void* cache, size_t length) 81 | { 82 | std::ofstream output(mCalibrationTableName, std::ios::binary); 83 | output.write(reinterpret_cast(cache), length); 84 | } 85 | 86 | private: 87 | TBatchStream mStream; 88 | size_t mInputCount; 89 | std::string mCalibrationTableName; 90 | const char* mInputBlobName; 91 | bool mReadCache{true}; 92 | void* mDeviceInput{nullptr}; 93 | std::vector mCalibrationCache; 94 | }; 95 | 96 | //! \class Int8EntropyCalibrator2 97 | //! 98 | //! \brief Implements Entropy calibrator 2. 99 | //! CalibrationAlgoType is kENTROPY_CALIBRATION_2. 100 | //! 101 | template 102 | class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 103 | { 104 | public: 105 | Int8EntropyCalibrator2( 106 | TBatchStream stream, int firstBatch, const char* networkName, const char* inputBlobName, bool readCache = true) 107 | : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) 108 | { 109 | } 110 | 111 | int getBatchSize() const override 112 | { 113 | return mImpl.getBatchSize(); 114 | } 115 | 116 | bool getBatch(void* bindings[], const char* names[], int nbBindings) override 117 | { 118 | return mImpl.getBatch(bindings, names, nbBindings); 119 | } 120 | 121 | const void* readCalibrationCache(size_t& length) override 122 | { 123 | return mImpl.readCalibrationCache(length); 124 | } 125 | 126 | void writeCalibrationCache(const void* cache, size_t length) override 127 | { 128 | mImpl.writeCalibrationCache(cache, length); 129 | } 130 | 131 | private: 132 | EntropyCalibratorImpl mImpl; 133 | }; 134 | 135 | #endif // ENTROPY_CALIBRATOR_H 136 | -------------------------------------------------------------------------------- /einsum/einsum_common7/bboxUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_BBOX_UTILS_H 18 | #define TRT_BBOX_UTILS_H 19 | 20 | #include "plugin.h" 21 | 22 | using namespace nvinfer1; 23 | using namespace nvinfer1::plugin; 24 | 25 | template 26 | struct Bbox 27 | { 28 | T xmin, ymin, xmax, ymax; 29 | Bbox(T xmin, T ymin, T xmax, T ymax) 30 | : xmin(xmin) 31 | , ymin(ymin) 32 | , xmax(xmax) 33 | , ymax(ymax) 34 | { 35 | } 36 | Bbox() = default; 37 | }; 38 | 39 | template 40 | struct BboxInfo 41 | { 42 | T conf_score; 43 | int label; 44 | int bbox_idx; 45 | bool kept; 46 | BboxInfo(T conf_score, int label, int bbox_idx, bool kept) 47 | : conf_score(conf_score) 48 | , label(label) 49 | , bbox_idx(bbox_idx) 50 | , kept(kept) 51 | { 52 | } 53 | BboxInfo() = default; 54 | }; 55 | 56 | template 57 | bool operator<(const Bbox& lhs, const Bbox& rhs) 58 | { 59 | return lhs.x1 < rhs.x1; 60 | } 61 | 62 | template 63 | bool operator==(const Bbox& lhs, const Bbox& rhs) 64 | { 65 | return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2; 66 | } 67 | // }}} 68 | 69 | int8_t* alignPtr(int8_t* ptr, uintptr_t to); 70 | 71 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize); 72 | 73 | size_t dataTypeSize(DataType dtype); 74 | 75 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets); 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /einsum/einsum_common7/checkMacrosPlugin.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "checkMacrosPlugin.h" 18 | #include 19 | #include 20 | #include 21 | 22 | namespace nvinfer1 23 | { 24 | namespace plugin 25 | { 26 | 27 | // This will be populated by the logger supplied by the user to initLibNvInferPlugins() 28 | ILogger* gLogger{}; 29 | 30 | template 31 | int LogStream::Buf::sync() 32 | { 33 | std::string s = str(); 34 | while (!s.empty() && s.back() == '\n') 35 | { 36 | s.pop_back(); 37 | } 38 | if (gLogger != nullptr) 39 | { 40 | gLogger->log(kSeverity, s.c_str()); 41 | } 42 | str(""); 43 | return 0; 44 | } 45 | 46 | // These use gLogger, and therefore require initLibNvInferPlugins() to be called with a logger 47 | // (otherwise, it will not log) 48 | LogStream gLogError; 49 | LogStream gLogWarning; 50 | LogStream gLogInfo; 51 | LogStream gLogVerbose; 52 | 53 | // break-pointable 54 | void throwCudaError(const char* file, const char* function, int line, int status, const char* msg) 55 | { 56 | CudaError error(file, function, line, status, msg); 57 | error.log(gLogError); 58 | throw error; 59 | } 60 | 61 | // break-pointable 62 | void throwCublasError(const char* file, const char* function, int line, int status, const char* msg) 63 | { 64 | if (msg == nullptr) 65 | { 66 | auto s_ = static_cast(status); 67 | switch (s_) 68 | { 69 | case CUBLAS_STATUS_SUCCESS: msg = "CUBLAS_STATUS_SUCCESS"; break; 70 | case CUBLAS_STATUS_NOT_INITIALIZED: msg = "CUBLAS_STATUS_NOT_INITIALIZED"; break; 71 | case CUBLAS_STATUS_ALLOC_FAILED: msg = "CUBLAS_STATUS_ALLOC_FAILED"; break; 72 | case CUBLAS_STATUS_INVALID_VALUE: msg = "CUBLAS_STATUS_INVALID_VALUE"; break; 73 | case CUBLAS_STATUS_ARCH_MISMATCH: msg = "CUBLAS_STATUS_ARCH_MISMATCH"; break; 74 | case CUBLAS_STATUS_MAPPING_ERROR: msg = "CUBLAS_STATUS_MAPPING_ERROR"; break; 75 | case CUBLAS_STATUS_EXECUTION_FAILED: msg = "CUBLAS_STATUS_EXECUTION_FAILED"; break; 76 | case CUBLAS_STATUS_INTERNAL_ERROR: msg = "CUBLAS_STATUS_INTERNAL_ERROR"; break; 77 | case CUBLAS_STATUS_NOT_SUPPORTED: msg = "CUBLAS_STATUS_NOT_SUPPORTED"; break; 78 | case CUBLAS_STATUS_LICENSE_ERROR: msg = "CUBLAS_STATUS_LICENSE_ERROR"; break; 79 | } 80 | } 81 | CublasError error(file, function, line, status, msg); 82 | error.log(gLogError); 83 | throw error; 84 | } 85 | 86 | // break-pointable 87 | void throwCudnnError(const char* file, const char* function, int line, int status, const char* msg) 88 | { 89 | CudnnError error(file, function, line, status, msg); 90 | error.log(gLogError); 91 | throw error; 92 | } 93 | 94 | void logError(const char* msg, const char* file, const char* fn, int line) 95 | { 96 | gLogError << "Parameter check failed at: " << file << "::" << fn << "::" << line; 97 | gLogError << ", condition: " << msg << std::endl; 98 | } 99 | 100 | // break-pointable 101 | void reportAssertion(const char* msg, const char* file, int line) 102 | { 103 | std::ostringstream stream; 104 | stream << "Assertion failed: " << msg << std::endl 105 | << file << ':' << line << std::endl 106 | << "Aborting..." << std::endl; 107 | getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str()); 108 | cudaDeviceReset(); 109 | abort(); 110 | } 111 | 112 | void TRTException::log(std::ostream& logStream) const 113 | { 114 | logStream << file << " (" << line << ") - " << name << " Error in " << function << ": " << status; 115 | if (message != nullptr) 116 | { 117 | logStream << " (" << message << ")"; 118 | } 119 | logStream << std::endl; 120 | } 121 | 122 | } // namespace plugin 123 | 124 | } // namespace nvinfer1 125 | -------------------------------------------------------------------------------- /einsum/einsum_common7/cub_helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "kernel.h" 18 | template 19 | size_t cubSortPairsWorkspaceSize(int num_items, int num_segments) 20 | { 21 | size_t temp_storage_bytes = 0; 22 | cub::DeviceSegmentedRadixSort::SortPairsDescending((void*) NULL, temp_storage_bytes, (const KeyT*) NULL, 23 | (KeyT*) NULL, (const ValueT*) NULL, (ValueT*) NULL, 24 | num_items, // # items 25 | num_segments, // # segments 26 | (const int*) NULL, (const int*) NULL); 27 | return temp_storage_bytes; 28 | } 29 | -------------------------------------------------------------------------------- /einsum/einsum_common7/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | file(GLOB SRCS *.cpp) 17 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS}) 18 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE) 19 | file(GLOB CU_SRCS *.cu) 20 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} ${CU_SRCS}) 21 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} PARENT_SCOPE) 22 | -------------------------------------------------------------------------------- /einsum/einsum_common7/kernels/extractFgScores.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "kernel.h" 17 | 18 | template 19 | pluginStatus_t extractFgScores_gpu(cudaStream_t stream, 20 | int N, 21 | int A, 22 | int H, 23 | int W, 24 | const void* scores, 25 | void* fgScores) 26 | { 27 | // Copy all the objectness scores for one batch 28 | size_t size = A * H * W * sizeof(T); 29 | for (int n = 0; n < N; n++) 30 | { 31 | // Find out the starting pointer of the objectness scores in the input 32 | size_t offset_ld = (n * 2 + 1) * A * H * W; 33 | // Find out the starting pointer of the objectness scores in the output 34 | size_t offset_st = n * A * H * W; 35 | CSC(cudaMemcpyAsync(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, cudaMemcpyDeviceToDevice, stream), STATUS_FAILURE); 36 | } 37 | 38 | return STATUS_SUCCESS; 39 | } 40 | 41 | template 42 | pluginStatus_t extractFgScores_cpu(int N, 43 | int A, 44 | int H, 45 | int W, 46 | const void* scores, 47 | void* fgScores) 48 | { 49 | size_t size = A * H * W * sizeof(T); 50 | for (int n = 0; n < N; n++) 51 | { 52 | size_t offset_ld = (n * 2 + 1) * A * H * W; 53 | size_t offset_st = n * A * H * W; 54 | memcpy(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size); 55 | } 56 | return STATUS_SUCCESS; 57 | } 58 | 59 | pluginStatus_t extractFgScores(cudaStream_t stream, 60 | const int N, 61 | const int A, 62 | const int H, 63 | const int W, 64 | const DataType t_scores, 65 | const DLayout_t l_scores, 66 | const void* scores, 67 | const DataType t_fgScores, 68 | const DLayout_t l_fgScores, 69 | void* fgScores) 70 | { 71 | if (l_fgScores != NCHW || l_scores != NCHW) 72 | return STATUS_BAD_PARAM; 73 | 74 | if (t_fgScores != DataType::kFLOAT) 75 | return STATUS_BAD_PARAM; 76 | 77 | if (t_scores != DataType::kFLOAT) 78 | return STATUS_BAD_PARAM; 79 | 80 | return extractFgScores_gpu(stream, N, A, H, W, scores, fgScores); 81 | } 82 | -------------------------------------------------------------------------------- /einsum/einsum_common7/kernels/generateAnchors.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "kernel.h" 17 | #include 18 | 19 | pluginStatus_t generateAnchors_cpu(int numRatios, 20 | float* ratios, 21 | int numScales, 22 | float* scales, 23 | int baseSize, 24 | float* anchors) 25 | { 26 | #ifdef DEBUG 27 | DEBUG_PRINTF("Generating Anchors with:\n"); 28 | DEBUG_PRINTF("Scales:"); 29 | for (int s = 0; s < numScales; ++s) 30 | { 31 | DEBUG_PRINTF("%f\t", scales[s]); 32 | } 33 | DEBUG_PRINTF("\n"); 34 | DEBUG_PRINTF("Ratios:"); 35 | for (int r = 0; r < numRatios; ++r) 36 | { 37 | DEBUG_PRINTF("%f\t", ratios[r]); 38 | } 39 | DEBUG_PRINTF("\n"); 40 | #endif 41 | 42 | if ((numScales <= 0) || (numRatios <= 0) || (baseSize <= 0)) 43 | { 44 | return STATUS_BAD_PARAM; 45 | } 46 | 47 | // Generate parameters for numRatios * numScales general anchor boxes 48 | for (int r = 0; r < numRatios; ++r) 49 | { 50 | for (int s = 0; s < numScales; ++s) 51 | { 52 | int id = r * numScales + s; 53 | float scale = scales[s]; 54 | float ratio = ratios[r]; 55 | float bs = baseSize; 56 | float ws = round(sqrt((float) (bs * bs) / ratio)); 57 | float hs = round(ws * ratio); 58 | // Width: bs / sqrt(ratio) * scale 59 | // Height: bs * sqrt(ratio) * scale 60 | ws *= scale; 61 | hs *= scale; 62 | 63 | // x_anchor_ctr 64 | /* 65 | * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes. 66 | * Because the center of anchor box in the original input raw image scale will not be dependent on this. 67 | */ 68 | anchors[id * 4] = (bs - 1) / 2; 69 | // y_anchor_ctr 70 | /* 71 | * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes. 72 | * Because the center of anchor box in the original input raw image scale will not be dependent on this. 73 | */ 74 | anchors[id * 4 + 1] = (bs - 1) / 2; 75 | // w_anchor 76 | anchors[id * 4 + 2] = ws; 77 | // h_anchor 78 | anchors[id * 4 + 3] = hs; 79 | } 80 | } 81 | return STATUS_SUCCESS; 82 | } 83 | 84 | pluginStatus_t generateAnchors(cudaStream_t stream, 85 | int numRatios, 86 | float* ratios, 87 | int numScales, 88 | float* scales, 89 | int baseSize, 90 | float* anchors) 91 | { 92 | // Each anchor box has 4 parameters 93 | int ac = numRatios * numScales * 4; 94 | float* anchors_cpu; 95 | cudaMallocHost((void**) &anchors_cpu, sizeof(float) * ac); 96 | pluginStatus_t status = generateAnchors_cpu(numRatios, ratios, numScales, scales, baseSize, anchors_cpu); 97 | cudaMemcpyAsync(anchors, anchors_cpu, sizeof(float) * ac, cudaMemcpyHostToDevice, stream); 98 | cudaFreeHost(anchors_cpu); 99 | return status; 100 | } 101 | -------------------------------------------------------------------------------- /einsum/einsum_common7/kernels/kernel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "kernel.h" 18 | #include "plugin.h" 19 | 20 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 21 | int topK, DataType DT_BBOX, DataType DT_SCORE) 22 | { 23 | size_t wss[7]; 24 | wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX); 25 | wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX); 26 | wss[2] = detectionForwardPreNMSSize(N, C2); 27 | wss[3] = detectionForwardPreNMSSize(N, C2); 28 | wss[4] = detectionForwardPostNMSSize(N, numClasses, topK); 29 | wss[5] = detectionForwardPostNMSSize(N, numClasses, topK); 30 | wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE), 31 | sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE)); 32 | return calculateTotalWorkspaceSize(wss, 7); 33 | } 34 | 35 | namespace nvinfer1 36 | { 37 | namespace plugin 38 | { 39 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 40 | int topK, DataType DT_BBOX, DataType DT_SCORE) 41 | { 42 | size_t wss[7]; 43 | wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX); 44 | wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX); 45 | wss[2] = detectionForwardPreNMSSize(N, C2); 46 | wss[3] = detectionForwardPreNMSSize(N, C2); 47 | wss[4] = detectionForwardPostNMSSize(N, numClasses, topK); 48 | wss[5] = detectionForwardPostNMSSize(N, numClasses, topK); 49 | wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE), 50 | sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE)); 51 | return calculateTotalWorkspaceSize(wss, 7); 52 | } 53 | } // namespace plugin 54 | } // namespace nvinfer1 55 | -------------------------------------------------------------------------------- /einsum/einsum_common7/kernels/lReLU.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "kernel.h" 18 | 19 | template 20 | __launch_bounds__(nthdsPerCTA) __global__ 21 | void pReLUKernel(const int n, const float negativeSlope, const float* input, float* output) 22 | { 23 | for (int i = blockIdx.x * nthdsPerCTA + threadIdx.x; i < n; i += gridDim.x * nthdsPerCTA) 24 | { 25 | output[i] = input[i] > 0 ? input[i] : input[i] * negativeSlope; 26 | } 27 | } 28 | 29 | pluginStatus_t lReLUGPU(cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output) 30 | { 31 | const int BS = 512; 32 | const int GS = (n + BS - 1) / BS; 33 | pReLUKernel<<>>(n, negativeSlope, 34 | (const float*) input, 35 | (float*) output); 36 | return STATUS_SUCCESS; 37 | } 38 | 39 | pluginStatus_t lReLUInference( 40 | cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output) 41 | { 42 | return lReLUGPU(stream, n, negativeSlope, (const float*) input, (float*) output); 43 | } 44 | -------------------------------------------------------------------------------- /einsum/einsum_common7/kernels/reducedMathPlugin.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef _REDUCED_MATH_PLUGIN_H 18 | #define _REDUCED_MATH_PLUGIN_H 19 | // Dynamically strength-reduced div and mod 20 | // 21 | // Ideas taken from Sean Baxter's MGPU library. 22 | // These classes provide for reduced complexity division and modulus 23 | // on integers, for the case where the same divisor or modulus will 24 | // be used repeatedly. 25 | 26 | namespace nvinfer1 27 | { 28 | namespace plugin 29 | { 30 | namespace detail 31 | { 32 | 33 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff); 34 | 35 | __host__ __device__ __forceinline__ unsigned int umulhi(unsigned int x, unsigned int y) 36 | { 37 | #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 100 38 | return __umulhi(x, y); 39 | #else 40 | unsigned long long z = (unsigned long long) x * (unsigned long long) y; 41 | return (unsigned int) (z >> 32); 42 | #endif 43 | } 44 | 45 | // This is a weird implementation that returns div_up(0,1)=0 but 46 | // div_up(0,2)=1 (wrong) -- just do not use it with a=0. 47 | __host__ __device__ inline int div_up(int a, int b) 48 | { 49 | return (a - 1) / b + 1; 50 | } 51 | 52 | } // end namespace detail 53 | 54 | class reduced_divisor 55 | { 56 | public: 57 | reduced_divisor() {} 58 | __host__ __forceinline__ reduced_divisor(int _y) 59 | : y(_y) 60 | { 61 | detail::find_divisor(y, mul_coeff, shift_coeff); 62 | } 63 | __host__ __device__ __forceinline__ reduced_divisor(unsigned _mul_coeff, unsigned _shift_coeff, int _y) 64 | : mul_coeff(_mul_coeff) 65 | , shift_coeff(_shift_coeff) 66 | , y(_y) 67 | { 68 | } 69 | __host__ __device__ __forceinline__ int div(int x) const 70 | { 71 | // if dividing by 1, then find_divisor wouldn't have worked because 72 | // mul_coeff would have had to be 2^32, which can't be represented, 73 | // so we have to special case that one. 74 | return (y != 1) ? detail::umulhi((unsigned int) x, mul_coeff) >> shift_coeff : x; 75 | } 76 | __host__ __device__ __forceinline__ int mod(int x) const 77 | { 78 | return x - (div(x) * y); 79 | } 80 | __host__ __device__ __forceinline__ void divmod(int x, int& q, int& mod) const 81 | { 82 | q = div(x); 83 | mod = x - (q * y); 84 | } 85 | __host__ __device__ __forceinline__ int get() const 86 | { 87 | return y; 88 | } 89 | inline __host__ void get_mul_shift(unsigned& mul, unsigned& shift) 90 | { 91 | mul = mul_coeff; 92 | shift = shift_coeff; 93 | } 94 | 95 | protected: 96 | unsigned int mul_coeff; 97 | unsigned int shift_coeff; 98 | int y; 99 | }; 100 | 101 | } // namespace plugin 102 | 103 | } // namespace nvinfer1 104 | #endif /*_REDUCED_MATH_PLUGIN_H*/ 105 | -------------------------------------------------------------------------------- /einsum/einsum_common7/kernels/reorgForward.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "reducedMathPlugin.h" 17 | #include "kernel.h" 18 | 19 | using namespace nvinfer1::plugin; // for reduced_divisor 20 | 21 | template 22 | __launch_bounds__(nthdsPerCTA) 23 | __global__ void reorgKernel( 24 | const float* input, // input tensor of shape (batch, C, H, W) 25 | const int volume, // note that volumes of input and output tensors are the same 26 | reduced_divisor batch, 27 | reduced_divisor C, 28 | reduced_divisor H, 29 | reduced_divisor W, 30 | reduced_divisor C_out, 31 | reduced_divisor stride, 32 | float* output) // output tensor of shape (batch, C * stride * stride, H / stride, W / stride) 33 | { 34 | /* 35 | * Reference 36 | * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas_kernels.cu#L370 37 | * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas.c#L9 38 | */ 39 | 40 | // outIndex is row-major position of input coordinates 41 | for (int outIndex = blockIdx.x * nthdsPerCTA + threadIdx.x; outIndex < volume; outIndex += nthdsPerCTA) 42 | { 43 | int i = outIndex; 44 | 45 | // calculate output coordinates from outIndex 46 | int outW, outH, outC; 47 | W.divmod(i, i, outW); 48 | H.divmod(i, i, outH); 49 | C.divmod(i, i, outC); 50 | int outN = i; 51 | 52 | // calculate input coordinates based on output coordinates 53 | // offset is [0, 1, ..., stride * stride - 1] = posH * stride + posW 54 | int offset, inC, posH, posW; 55 | C_out.divmod(outC, offset, inC); 56 | stride.divmod(offset, posH, posW); 57 | int inH = outH * stride.get() + posH; 58 | int inW = outW * stride.get() + posW; 59 | int inN = outN; 60 | 61 | // inIndex is row-major position of input coordinates 62 | int inIndex = inW + W.get() * stride.get() * (inH + H.get() * stride.get() * (inC + C_out.get() * inN)); 63 | 64 | output[outIndex] = input[inIndex]; 65 | } 66 | } 67 | 68 | pluginStatus_t reorgGPU( 69 | cudaStream_t stream, 70 | const int batch, 71 | const int C, 72 | const int H, 73 | const int W, 74 | const int stride, 75 | const float* input, 76 | float* output) 77 | { 78 | const int BS = 512; // number of threads in one block 79 | const int volume = batch * C * H * W; // size of input tensor 80 | const int GS = (volume + BS - 1) / BS; // number of blocks to launch, calculated so global number of threads is >= volume 81 | 82 | reduced_divisor C_out(C / (stride * stride)); 83 | reorgKernel<<>>(input, volume, reduced_divisor(batch), reduced_divisor(C), reduced_divisor(H), reduced_divisor(W), C_out, reduced_divisor(stride), output); 84 | return STATUS_SUCCESS; 85 | } 86 | 87 | pluginStatus_t reorgInference( 88 | cudaStream_t stream, 89 | const int batch, 90 | const int C, 91 | const int H, 92 | const int W, 93 | const int stride, 94 | const void* input, 95 | void* output) 96 | { 97 | return reorgGPU(stream, batch, C, H, W, stride, (const float*) input, (float*) output); 98 | } 99 | -------------------------------------------------------------------------------- /einsum/einsum_common7/logger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "logger.h" 18 | #include "logging.h" 19 | 20 | namespace sample 21 | { 22 | Logger gLogger{Logger::Severity::kINFO}; 23 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; 24 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; 25 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; 26 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; 27 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; 28 | 29 | void setReportableSeverity(Logger::Severity severity) 30 | { 31 | gLogger.setReportableSeverity(severity); 32 | gLogVerbose.setReportableSeverity(severity); 33 | gLogInfo.setReportableSeverity(severity); 34 | gLogWarning.setReportableSeverity(severity); 35 | gLogError.setReportableSeverity(severity); 36 | gLogFatal.setReportableSeverity(severity); 37 | } 38 | } // namespace sample 39 | -------------------------------------------------------------------------------- /einsum/einsum_common7/logger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef LOGGER_H 18 | #define LOGGER_H 19 | 20 | #include "logging.h" 21 | 22 | namespace sample 23 | { 24 | extern Logger gLogger; 25 | extern LogStreamConsumer gLogVerbose; 26 | extern LogStreamConsumer gLogInfo; 27 | extern LogStreamConsumer gLogWarning; 28 | extern LogStreamConsumer gLogError; 29 | extern LogStreamConsumer gLogFatal; 30 | 31 | void setReportableSeverity(Logger::Severity severity); 32 | } // namespace sample 33 | 34 | #endif // LOGGER_H 35 | -------------------------------------------------------------------------------- /einsum/einsum_common7/nmsHelper.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "plugin.h" 18 | #include "cuda_fp16.h" 19 | #include 20 | 21 | using namespace nvinfer1; 22 | using namespace nvinfer1::plugin; 23 | 24 | size_t detectionForwardBBoxDataSize(int N, int C1, DataType DT_BBOX) 25 | { 26 | if (DT_BBOX == DataType::kFLOAT) 27 | { 28 | return N * C1 * sizeof(float); 29 | } 30 | if (DT_BBOX == DataType::kHALF) 31 | { 32 | return N * C1 * sizeof(__half); 33 | } 34 | 35 | printf("Only FP32/FP16 type bounding boxes are supported.\n"); 36 | return (size_t) -1; 37 | } 38 | 39 | size_t detectionForwardBBoxPermuteSize(bool shareLocation, int N, int C1, DataType DT_BBOX) 40 | { 41 | if (DT_BBOX == DataType::kFLOAT) 42 | { 43 | return shareLocation ? 0 : N * C1 * sizeof(float); 44 | } 45 | if (DT_BBOX == DataType::kHALF) 46 | { 47 | return shareLocation ? 0 : N * C1 * sizeof(__half); 48 | } 49 | 50 | printf("Only FP32/FP16 type bounding boxes are supported.\n"); 51 | return (size_t) -1; 52 | } 53 | 54 | size_t detectionForwardPreNMSSize(int N, int C2) 55 | { 56 | ASSERT(sizeof(float) == sizeof(int)); 57 | return N * C2 * sizeof(float); 58 | } 59 | 60 | size_t detectionForwardPostNMSSize(int N, int numClasses, int topK) 61 | { 62 | ASSERT(sizeof(float) == sizeof(int)); 63 | return N * numClasses * topK * sizeof(float); 64 | } 65 | -------------------------------------------------------------------------------- /einsum/einsum_common7/nmsUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_NMS_UTILS_H 18 | #define TRT_NMS_UTILS_H 19 | 20 | #include "plugin.h" 21 | 22 | using namespace nvinfer1; 23 | using namespace nvinfer1::plugin; 24 | 25 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 26 | int topK, DataType DT_BBOX, DataType DT_SCORE); 27 | #endif 28 | -------------------------------------------------------------------------------- /einsum/einsum_common7/parserOnnxConfig.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef PARSER_ONNX_CONFIG_H 18 | #define PARSER_ONNX_CONFIG_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include "NvInfer.h" 25 | #include "NvOnnxConfig.h" 26 | #include "NvOnnxParser.h" 27 | 28 | #define ONNX_DEBUG 1 29 | 30 | /** 31 | * \class ParserOnnxConfig 32 | * \brief Configuration Manager Class Concrete Implementation 33 | * 34 | * \note: 35 | * 36 | */ 37 | 38 | using namespace std; 39 | 40 | class ParserOnnxConfig : public nvonnxparser::IOnnxConfig 41 | { 42 | 43 | protected: 44 | string mModelFilename{}; 45 | string mTextFilename{}; 46 | string mFullTextFilename{}; 47 | nvinfer1::DataType mModelDtype; 48 | nvonnxparser::IOnnxConfig::Verbosity mVerbosity; 49 | bool mPrintLayercInfo; 50 | 51 | public: 52 | ParserOnnxConfig() 53 | : mModelDtype(nvinfer1::DataType::kFLOAT) 54 | , mVerbosity(static_cast(nvinfer1::ILogger::Severity::kWARNING)) 55 | , mPrintLayercInfo(false) 56 | { 57 | #ifdef ONNX_DEBUG 58 | if (isDebug()) 59 | { 60 | std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl; 61 | } 62 | #endif 63 | } 64 | 65 | protected: 66 | ~ParserOnnxConfig() 67 | { 68 | #ifdef ONNX_DEBUG 69 | if (isDebug()) 70 | { 71 | std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl; 72 | } 73 | #endif 74 | } 75 | 76 | public: 77 | virtual void setModelDtype(const nvinfer1::DataType modelDtype) 78 | { 79 | mModelDtype = modelDtype; 80 | } 81 | 82 | virtual nvinfer1::DataType getModelDtype() const 83 | { 84 | return mModelDtype; 85 | } 86 | 87 | virtual const char* getModelFileName() const 88 | { 89 | return mModelFilename.c_str(); 90 | } 91 | virtual void setModelFileName(const char* onnxFilename) 92 | { 93 | mModelFilename = string(onnxFilename); 94 | } 95 | virtual nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const 96 | { 97 | return mVerbosity; 98 | } 99 | virtual void addVerbosity() 100 | { 101 | ++mVerbosity; 102 | } 103 | virtual void reduceVerbosity() 104 | { 105 | --mVerbosity; 106 | } 107 | virtual void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) 108 | { 109 | mVerbosity = verbosity; 110 | } 111 | 112 | virtual const char* getTextFileName() const 113 | { 114 | return mTextFilename.c_str(); 115 | } 116 | virtual void setTextFileName(const char* textFilename) 117 | { 118 | mTextFilename = string(textFilename); 119 | } 120 | virtual const char* getFullTextFileName() const 121 | { 122 | return mFullTextFilename.c_str(); 123 | } 124 | virtual void setFullTextFileName(const char* fullTextFilename) 125 | { 126 | mFullTextFilename = string(fullTextFilename); 127 | } 128 | virtual bool getPrintLayerInfo() const 129 | { 130 | return mPrintLayercInfo; 131 | } 132 | virtual void setPrintLayerInfo(bool src) 133 | { 134 | mPrintLayercInfo = src; 135 | } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() 136 | 137 | virtual bool isDebug() const 138 | { 139 | #if ONNX_DEBUG 140 | return (std::getenv("ONNX_DEBUG") ? true : false); 141 | #else 142 | return false; 143 | #endif 144 | } 145 | 146 | virtual void destroy() 147 | { 148 | delete this; 149 | } 150 | 151 | }; // class ParserOnnxConfig 152 | 153 | #endif 154 | -------------------------------------------------------------------------------- /einsum/einsum_common7/pluginLogger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef PLUGIN_LOGGER_H 18 | #define PLUGIN_LOGGER_H 19 | 20 | #include "pluginLogging.h" 21 | 22 | namespace 23 | { 24 | Logger gLogger{Logger::Severity::kINFO}; 25 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; 26 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; 27 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; 28 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; 29 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; 30 | } // namespace 31 | 32 | #endif // PLUGIN_LOGGER_H 33 | -------------------------------------------------------------------------------- /einsum/einsum_common7/reducedMathPlugin.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | namespace nvinfer1 18 | { 19 | namespace plugin 20 | { 21 | namespace detail 22 | { 23 | 24 | // Count leading zeros - start from most significant bit. 25 | int clz(int x) 26 | { 27 | for (int i = 31; i >= 0; --i) 28 | { 29 | if ((1 << i) & x) 30 | { 31 | return 31 - i; 32 | } 33 | } 34 | return 32; 35 | } 36 | 37 | #define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) -1))) 38 | 39 | int find_log_2(int x, bool round_up = false) 40 | { 41 | int a = 31 - clz(x); 42 | if (round_up) 43 | { 44 | a += !CUDNN_IS_POW_2(x); 45 | } 46 | return a; 47 | } 48 | 49 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff) 50 | { 51 | if (denom == 0) 52 | { 53 | return; 54 | } 55 | if (denom == 1) 56 | { 57 | // if dividing by 1, reduced math doesn't work because mul_coeff would 58 | // need to be 2^32, which doesn't fit into unsigned int. the div() 59 | // routine handles this special case separately. 60 | mul_coeff = 0; 61 | shift_coeff = 0; 62 | return; 63 | } 64 | // To express the division N/D in terms of a multiplication, what we first 65 | // imagine is simply N*(1/D). However, 1/D will always evaluate to 0 (for D>1), 66 | // so we need another way. There's nothing that says we have to use exactly 67 | // the fraction 1/D; instead it could be any X/Y that reduces to 1/D (i.e., 68 | // Y=X*D), or at least to "close enough" to it. If we pick Y that is a power 69 | // of two, then the N*(X/Y) can be N*X followed by a right-shift by some amount. 70 | // The power of two we should pick should be at least 2^32, because in the 71 | // div() routine we'll use umulhi(), which returns only the upper 32 bits -- 72 | // this being equivalent to a right-shift by 32. But we might want a higher 73 | // power of two for better accuracy depending on the magnitude of the denominator. 74 | // Once we've picked Y, then X [our mul_coeff value] is simply Y/D, rounding up, 75 | // and we save shift_coeff as whatever further shift we have to do beyond 76 | // what the umulhi() implies. 77 | unsigned int p = 31 + find_log_2(denom, true); 78 | unsigned int m = ((1ull << p) + (unsigned int) denom - 1) / (unsigned int) denom; 79 | mul_coeff = m; 80 | shift_coeff = p - 32; 81 | } 82 | 83 | } // namespace detail 84 | 85 | } // namespace plugin 86 | 87 | } // namespace nvinfer1 88 | -------------------------------------------------------------------------------- /einsum/einsum_common7/sampleEngines.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_SAMPLE_ENGINES_H 18 | #define TRT_SAMPLE_ENGINES_H 19 | 20 | #include 21 | 22 | #include "NvCaffeParser.h" 23 | #include "NvInfer.h" 24 | #include "NvOnnxParser.h" 25 | #include "NvUffParser.h" 26 | 27 | #include "sampleOptions.h" 28 | #include "sampleUtils.h" 29 | 30 | namespace sample 31 | { 32 | 33 | struct Parser 34 | { 35 | TrtUniquePtr caffeParser; 36 | TrtUniquePtr uffParser; 37 | TrtUniquePtr onnxParser; 38 | 39 | operator bool() const 40 | { 41 | return caffeParser || uffParser || onnxParser; 42 | } 43 | }; 44 | 45 | //! 46 | //! \brief Generate a network definition for a given model 47 | //! 48 | //! \return Parser The parser used to initialize the network and that holds the weights for the network, or an invalid 49 | //! parser (the returned parser converts to false if tested) 50 | //! 51 | //! \see Parser::operator bool() 52 | //! 53 | Parser modelToNetwork(const ModelOptions& model, nvinfer1::INetworkDefinition& network, std::ostream& err); 54 | 55 | //! 56 | //! \brief Create an engine for a network defintion 57 | //! 58 | //! \return Pointer to the engine created or nullptr if the creation failed 59 | //! 60 | nvinfer1::ICudaEngine* networkToEngine(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, 61 | nvinfer1::INetworkDefinition& network, std::ostream& err); 62 | 63 | //! 64 | //! \brief Create an engine for a given model 65 | //! 66 | //! \return Pointer to the engine created or nullptr if the creation failed 67 | //! 68 | nvinfer1::ICudaEngine* modelToEngine( 69 | const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); 70 | 71 | //! 72 | //! \brief Log refittable layers and weights of a refittable engine 73 | //! 74 | void dumpRefittable(nvinfer1::ICudaEngine& engine); 75 | 76 | //! 77 | //! \brief Load a serialized engine 78 | //! 79 | //! \return Pointer to the engine loaded or nullptr if the operation failed 80 | //! 81 | nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, std::ostream& err); 82 | 83 | //! 84 | //! \brief Save an engine into a file 85 | //! 86 | //! \return boolean Return true if the engine was successfully saved 87 | //! 88 | bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName, std::ostream& err); 89 | 90 | //! 91 | //! \brief Create an engine from model or serialized file, and optionally save engine 92 | //! 93 | //! \return Pointer to the engine created or nullptr if the creation failed 94 | //! 95 | TrtUniquePtr getEngine( 96 | const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); 97 | 98 | } // namespace sample 99 | 100 | #endif // TRT_SAMPLE_ENGINES_H 101 | -------------------------------------------------------------------------------- /einsum/einsum_common7/sampleInference.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_SAMPLE_INFERENCE_H 18 | #define TRT_SAMPLE_INFERENCE_H 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "NvInfer.h" 26 | 27 | #include "sampleReporting.h" 28 | #include "sampleUtils.h" 29 | 30 | namespace sample 31 | { 32 | 33 | struct InferenceEnvironment 34 | { 35 | TrtUniquePtr engine; 36 | std::unique_ptr profiler; 37 | std::vector> context; 38 | std::vector> bindings; 39 | }; 40 | 41 | //! 42 | //! \brief Set up contexts and bindings for inference 43 | //! 44 | bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference); 45 | 46 | //! 47 | //! \brief Run inference and collect timing 48 | //! 49 | void runInference( 50 | const InferenceOptions& inference, InferenceEnvironment& iEnv, int device, std::vector& trace); 51 | 52 | } // namespace sample 53 | 54 | #endif // TRT_SAMPLE_INFERENCE_H 55 | -------------------------------------------------------------------------------- /einsum/einsum_common7/serialize.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | using std::cerr; 26 | using std::cout; 27 | using std::endl; 28 | 29 | template 30 | inline void serialize_value(void** buffer, T const& value); 31 | 32 | template 33 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value); 34 | 35 | namespace 36 | { 37 | 38 | template 39 | struct Serializer 40 | { 41 | }; 42 | 43 | template 44 | struct Serializer::value || std::is_enum::value || std::is_pod::value>::type> 46 | { 47 | static size_t serialized_size(T const& value) 48 | { 49 | return sizeof(T); 50 | } 51 | static void serialize(void** buffer, T const& value) 52 | { 53 | ::memcpy(*buffer, &value, sizeof(T)); 54 | reinterpret_cast(*buffer) += sizeof(T); 55 | } 56 | static void deserialize(void const** buffer, size_t* buffer_size, T* value) 57 | { 58 | assert(*buffer_size >= sizeof(T)); 59 | ::memcpy(value, *buffer, sizeof(T)); 60 | reinterpret_cast(*buffer) += sizeof(T); 61 | *buffer_size -= sizeof(T); 62 | } 63 | }; 64 | 65 | template <> 66 | struct Serializer 67 | { 68 | static size_t serialized_size(const char* value) 69 | { 70 | return strlen(value) + 1; 71 | } 72 | static void serialize(void** buffer, const char* value) 73 | { 74 | ::strcpy(static_cast(*buffer), value); 75 | reinterpret_cast(*buffer) += strlen(value) + 1; 76 | } 77 | static void deserialize(void const** buffer, size_t* buffer_size, const char** value) 78 | { 79 | *value = static_cast(*buffer); 80 | size_t data_size = strnlen(*value, *buffer_size) + 1; 81 | assert(*buffer_size >= data_size); 82 | reinterpret_cast(*buffer) += data_size; 83 | *buffer_size -= data_size; 84 | } 85 | }; 86 | 87 | template 88 | struct Serializer, 89 | typename std::enable_if::value || std::is_enum::value || std::is_pod::value>::type> 90 | { 91 | static size_t serialized_size(std::vector const& value) 92 | { 93 | return sizeof(value.size()) + value.size() * sizeof(T); 94 | } 95 | static void serialize(void** buffer, std::vector const& value) 96 | { 97 | serialize_value(buffer, value.size()); 98 | size_t nbyte = value.size() * sizeof(T); 99 | ::memcpy(*buffer, value.data(), nbyte); 100 | reinterpret_cast(*buffer) += nbyte; 101 | } 102 | static void deserialize(void const** buffer, size_t* buffer_size, std::vector* value) 103 | { 104 | size_t size; 105 | deserialize_value(buffer, buffer_size, &size); 106 | value->resize(size); 107 | size_t nbyte = value->size() * sizeof(T); 108 | assert(*buffer_size >= nbyte); 109 | ::memcpy(value->data(), *buffer, nbyte); 110 | reinterpret_cast(*buffer) += nbyte; 111 | *buffer_size -= nbyte; 112 | } 113 | }; 114 | 115 | } // namespace 116 | 117 | template 118 | inline size_t serialized_size(T const& value) 119 | { 120 | return Serializer::serialized_size(value); 121 | } 122 | 123 | template 124 | inline void serialize_value(void** buffer, T const& value) 125 | { 126 | return Serializer::serialize(buffer, value); 127 | } 128 | 129 | template 130 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value) 131 | { 132 | return Serializer::deserialize(buffer, buffer_size, value); 133 | } 134 | -------------------------------------------------------------------------------- /einsum/einsum_common8/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(kernels) 2 | aux_source_directory(./ DIR_LIB_SRCS) 3 | add_library(einsum_common_lib ${DIR_LIB_SRCS}) -------------------------------------------------------------------------------- /einsum/einsum_common8/bboxUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_BBOX_UTILS_H 18 | #define TRT_BBOX_UTILS_H 19 | 20 | #include "plugin.h" 21 | 22 | using namespace nvinfer1; 23 | using namespace nvinfer1::plugin; 24 | 25 | template 26 | struct Bbox 27 | { 28 | T xmin, ymin, xmax, ymax; 29 | Bbox(T xmin, T ymin, T xmax, T ymax) 30 | : xmin(xmin) 31 | , ymin(ymin) 32 | , xmax(xmax) 33 | , ymax(ymax) 34 | { 35 | } 36 | Bbox() = default; 37 | }; 38 | 39 | template 40 | struct BboxInfo 41 | { 42 | T conf_score; 43 | int label; 44 | int bbox_idx; 45 | bool kept; 46 | BboxInfo(T conf_score, int label, int bbox_idx, bool kept) 47 | : conf_score(conf_score) 48 | , label(label) 49 | , bbox_idx(bbox_idx) 50 | , kept(kept) 51 | { 52 | } 53 | BboxInfo() = default; 54 | }; 55 | 56 | template 57 | bool operator<(const Bbox& lhs, const Bbox& rhs) 58 | { 59 | return lhs.x1 < rhs.x1; 60 | } 61 | 62 | template 63 | bool operator==(const Bbox& lhs, const Bbox& rhs) 64 | { 65 | return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2; 66 | } 67 | // }}} 68 | 69 | int8_t* alignPtr(int8_t* ptr, uintptr_t to); 70 | 71 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize); 72 | 73 | size_t dataTypeSize(DataType dtype); 74 | 75 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets); 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /einsum/einsum_common8/checkMacrosPlugin.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "checkMacrosPlugin.h" 18 | #include 19 | #include 20 | #include 21 | 22 | namespace nvinfer1 23 | { 24 | namespace plugin 25 | { 26 | 27 | // This will be populated by the logger supplied by the user to initLibNvInferPlugins() 28 | ILogger* gLogger{}; 29 | 30 | template 31 | int LogStream::Buf::sync() 32 | { 33 | std::string s = str(); 34 | while (!s.empty() && s.back() == '\n') 35 | { 36 | s.pop_back(); 37 | } 38 | if (gLogger != nullptr) 39 | { 40 | gLogger->log(kSeverity, s.c_str()); 41 | } 42 | str(""); 43 | return 0; 44 | } 45 | 46 | // These use gLogger, and therefore require initLibNvInferPlugins() to be called with a logger 47 | // (otherwise, it will not log) 48 | LogStream gLogError; 49 | LogStream gLogWarning; 50 | LogStream gLogInfo; 51 | LogStream gLogVerbose; 52 | 53 | // break-pointable 54 | void throwCudaError(const char* file, const char* function, int line, int status, const char* msg) 55 | { 56 | CudaError error(file, function, line, status, msg); 57 | error.log(gLogError); 58 | throw error; 59 | } 60 | 61 | // break-pointable 62 | void throwCublasError(const char* file, const char* function, int line, int status, const char* msg) 63 | { 64 | if (msg == nullptr) 65 | { 66 | auto s_ = static_cast(status); 67 | switch (s_) 68 | { 69 | case CUBLAS_STATUS_SUCCESS: msg = "CUBLAS_STATUS_SUCCESS"; break; 70 | case CUBLAS_STATUS_NOT_INITIALIZED: msg = "CUBLAS_STATUS_NOT_INITIALIZED"; break; 71 | case CUBLAS_STATUS_ALLOC_FAILED: msg = "CUBLAS_STATUS_ALLOC_FAILED"; break; 72 | case CUBLAS_STATUS_INVALID_VALUE: msg = "CUBLAS_STATUS_INVALID_VALUE"; break; 73 | case CUBLAS_STATUS_ARCH_MISMATCH: msg = "CUBLAS_STATUS_ARCH_MISMATCH"; break; 74 | case CUBLAS_STATUS_MAPPING_ERROR: msg = "CUBLAS_STATUS_MAPPING_ERROR"; break; 75 | case CUBLAS_STATUS_EXECUTION_FAILED: msg = "CUBLAS_STATUS_EXECUTION_FAILED"; break; 76 | case CUBLAS_STATUS_INTERNAL_ERROR: msg = "CUBLAS_STATUS_INTERNAL_ERROR"; break; 77 | case CUBLAS_STATUS_NOT_SUPPORTED: msg = "CUBLAS_STATUS_NOT_SUPPORTED"; break; 78 | case CUBLAS_STATUS_LICENSE_ERROR: msg = "CUBLAS_STATUS_LICENSE_ERROR"; break; 79 | } 80 | } 81 | CublasError error(file, function, line, status, msg); 82 | error.log(gLogError); 83 | throw error; 84 | } 85 | 86 | // break-pointable 87 | void throwCudnnError(const char* file, const char* function, int line, int status, const char* msg) 88 | { 89 | CudnnError error(file, function, line, status, msg); 90 | error.log(gLogError); 91 | throw error; 92 | } 93 | 94 | void logError(const char* msg, const char* file, const char* fn, int line) 95 | { 96 | gLogError << "Parameter check failed at: " << file << "::" << fn << "::" << line; 97 | gLogError << ", condition: " << msg << std::endl; 98 | } 99 | 100 | // break-pointable 101 | void reportAssertion(const char* msg, const char* file, int line) 102 | { 103 | std::ostringstream stream; 104 | stream << "Assertion failed: " << msg << std::endl 105 | << file << ':' << line << std::endl 106 | << "Aborting..." << std::endl; 107 | getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str()); 108 | cudaDeviceReset(); 109 | abort(); 110 | } 111 | 112 | void TRTException::log(std::ostream& logStream) const 113 | { 114 | logStream << file << " (" << line << ") - " << name << " Error in " << function << ": " << status; 115 | if (message != nullptr) 116 | { 117 | logStream << " (" << message << ")"; 118 | } 119 | logStream << std::endl; 120 | } 121 | 122 | } // namespace plugin 123 | 124 | } // namespace nvinfer1 125 | -------------------------------------------------------------------------------- /einsum/einsum_common8/cub_helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "kernel.h" 18 | template 19 | size_t cubSortPairsWorkspaceSize(int num_items, int num_segments) 20 | { 21 | size_t temp_storage_bytes = 0; 22 | cub::DeviceSegmentedRadixSort::SortPairsDescending((void*) NULL, temp_storage_bytes, (const KeyT*) NULL, 23 | (KeyT*) NULL, (const ValueT*) NULL, (ValueT*) NULL, 24 | num_items, // # items 25 | num_segments, // # segments 26 | (const int*) NULL, (const int*) NULL); 27 | return temp_storage_bytes; 28 | } 29 | -------------------------------------------------------------------------------- /einsum/einsum_common8/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | file(GLOB SRCS *.cpp) 17 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS}) 18 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE) 19 | file(GLOB CU_SRCS *.cu) 20 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} ${CU_SRCS}) 21 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} PARENT_SCOPE) 22 | -------------------------------------------------------------------------------- /einsum/einsum_common8/kernels/extractFgScores.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "kernel.h" 17 | 18 | template 19 | pluginStatus_t extractFgScores_gpu(cudaStream_t stream, 20 | int N, 21 | int A, 22 | int H, 23 | int W, 24 | const void* scores, 25 | void* fgScores) 26 | { 27 | // Copy all the objectness scores for one batch 28 | size_t size = A * H * W * sizeof(T); 29 | for (int n = 0; n < N; n++) 30 | { 31 | // Find out the starting pointer of the objectness scores in the input 32 | size_t offset_ld = (n * 2 + 1) * A * H * W; 33 | // Find out the starting pointer of the objectness scores in the output 34 | size_t offset_st = n * A * H * W; 35 | CSC(cudaMemcpyAsync(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, cudaMemcpyDeviceToDevice, stream), STATUS_FAILURE); 36 | } 37 | 38 | return STATUS_SUCCESS; 39 | } 40 | 41 | template 42 | pluginStatus_t extractFgScores_cpu(int N, 43 | int A, 44 | int H, 45 | int W, 46 | const void* scores, 47 | void* fgScores) 48 | { 49 | size_t size = A * H * W * sizeof(T); 50 | for (int n = 0; n < N; n++) 51 | { 52 | size_t offset_ld = (n * 2 + 1) * A * H * W; 53 | size_t offset_st = n * A * H * W; 54 | memcpy(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size); 55 | } 56 | return STATUS_SUCCESS; 57 | } 58 | 59 | pluginStatus_t extractFgScores(cudaStream_t stream, 60 | const int N, 61 | const int A, 62 | const int H, 63 | const int W, 64 | const DataType t_scores, 65 | const DLayout_t l_scores, 66 | const void* scores, 67 | const DataType t_fgScores, 68 | const DLayout_t l_fgScores, 69 | void* fgScores) 70 | { 71 | if (l_fgScores != NCHW || l_scores != NCHW) 72 | return STATUS_BAD_PARAM; 73 | 74 | if (t_fgScores != DataType::kFLOAT) 75 | return STATUS_BAD_PARAM; 76 | 77 | if (t_scores != DataType::kFLOAT) 78 | return STATUS_BAD_PARAM; 79 | 80 | return extractFgScores_gpu(stream, N, A, H, W, scores, fgScores); 81 | } 82 | -------------------------------------------------------------------------------- /einsum/einsum_common8/kernels/generateAnchors.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "kernel.h" 17 | #include 18 | 19 | pluginStatus_t generateAnchors_cpu(int numRatios, 20 | float* ratios, 21 | int numScales, 22 | float* scales, 23 | int baseSize, 24 | float* anchors) 25 | { 26 | #ifdef DEBUG 27 | DEBUG_PRINTF("Generating Anchors with:\n"); 28 | DEBUG_PRINTF("Scales:"); 29 | for (int s = 0; s < numScales; ++s) 30 | { 31 | DEBUG_PRINTF("%f\t", scales[s]); 32 | } 33 | DEBUG_PRINTF("\n"); 34 | DEBUG_PRINTF("Ratios:"); 35 | for (int r = 0; r < numRatios; ++r) 36 | { 37 | DEBUG_PRINTF("%f\t", ratios[r]); 38 | } 39 | DEBUG_PRINTF("\n"); 40 | #endif 41 | 42 | if ((numScales <= 0) || (numRatios <= 0) || (baseSize <= 0)) 43 | { 44 | return STATUS_BAD_PARAM; 45 | } 46 | 47 | // Generate parameters for numRatios * numScales general anchor boxes 48 | for (int r = 0; r < numRatios; ++r) 49 | { 50 | for (int s = 0; s < numScales; ++s) 51 | { 52 | int id = r * numScales + s; 53 | float scale = scales[s]; 54 | float ratio = ratios[r]; 55 | float bs = baseSize; 56 | float ws = round(sqrt((float) (bs * bs) / ratio)); 57 | float hs = round(ws * ratio); 58 | // Width: bs / sqrt(ratio) * scale 59 | // Height: bs * sqrt(ratio) * scale 60 | ws *= scale; 61 | hs *= scale; 62 | 63 | // x_anchor_ctr 64 | /* 65 | * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes. 66 | * Because the center of anchor box in the original input raw image scale will not be dependent on this. 67 | */ 68 | anchors[id * 4] = (bs - 1) / 2; 69 | // y_anchor_ctr 70 | /* 71 | * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes. 72 | * Because the center of anchor box in the original input raw image scale will not be dependent on this. 73 | */ 74 | anchors[id * 4 + 1] = (bs - 1) / 2; 75 | // w_anchor 76 | anchors[id * 4 + 2] = ws; 77 | // h_anchor 78 | anchors[id * 4 + 3] = hs; 79 | } 80 | } 81 | return STATUS_SUCCESS; 82 | } 83 | 84 | pluginStatus_t generateAnchors(cudaStream_t stream, 85 | int numRatios, 86 | float* ratios, 87 | int numScales, 88 | float* scales, 89 | int baseSize, 90 | float* anchors) 91 | { 92 | // Each anchor box has 4 parameters 93 | int ac = numRatios * numScales * 4; 94 | float* anchors_cpu; 95 | cudaMallocHost((void**) &anchors_cpu, sizeof(float) * ac); 96 | pluginStatus_t status = generateAnchors_cpu(numRatios, ratios, numScales, scales, baseSize, anchors_cpu); 97 | cudaMemcpyAsync(anchors, anchors_cpu, sizeof(float) * ac, cudaMemcpyHostToDevice, stream); 98 | cudaFreeHost(anchors_cpu); 99 | return status; 100 | } 101 | -------------------------------------------------------------------------------- /einsum/einsum_common8/kernels/kernel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "kernel.h" 18 | #include "plugin.h" 19 | 20 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 21 | int topK, DataType DT_BBOX, DataType DT_SCORE) 22 | { 23 | size_t wss[7]; 24 | wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX); 25 | wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX); 26 | wss[2] = detectionForwardPreNMSSize(N, C2); 27 | wss[3] = detectionForwardPreNMSSize(N, C2); 28 | wss[4] = detectionForwardPostNMSSize(N, numClasses, topK); 29 | wss[5] = detectionForwardPostNMSSize(N, numClasses, topK); 30 | wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE), 31 | sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE)); 32 | return calculateTotalWorkspaceSize(wss, 7); 33 | } 34 | 35 | namespace nvinfer1 36 | { 37 | namespace plugin 38 | { 39 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 40 | int topK, DataType DT_BBOX, DataType DT_SCORE) 41 | { 42 | size_t wss[7]; 43 | wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX); 44 | wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX); 45 | wss[2] = detectionForwardPreNMSSize(N, C2); 46 | wss[3] = detectionForwardPreNMSSize(N, C2); 47 | wss[4] = detectionForwardPostNMSSize(N, numClasses, topK); 48 | wss[5] = detectionForwardPostNMSSize(N, numClasses, topK); 49 | wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE), 50 | sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE)); 51 | return calculateTotalWorkspaceSize(wss, 7); 52 | } 53 | } // namespace plugin 54 | } // namespace nvinfer1 55 | -------------------------------------------------------------------------------- /einsum/einsum_common8/kernels/lReLU.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "kernel.h" 18 | 19 | template 20 | __launch_bounds__(nthdsPerCTA) __global__ 21 | void pReLUKernel(const int n, const float negativeSlope, const float* input, float* output) 22 | { 23 | for (int i = blockIdx.x * nthdsPerCTA + threadIdx.x; i < n; i += gridDim.x * nthdsPerCTA) 24 | { 25 | output[i] = input[i] > 0 ? input[i] : input[i] * negativeSlope; 26 | } 27 | } 28 | 29 | pluginStatus_t lReLUGPU(cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output) 30 | { 31 | const int BS = 512; 32 | const int GS = (n + BS - 1) / BS; 33 | pReLUKernel<<>>(n, negativeSlope, 34 | (const float*) input, 35 | (float*) output); 36 | return STATUS_SUCCESS; 37 | } 38 | 39 | pluginStatus_t lReLUInference( 40 | cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output) 41 | { 42 | return lReLUGPU(stream, n, negativeSlope, (const float*) input, (float*) output); 43 | } 44 | -------------------------------------------------------------------------------- /einsum/einsum_common8/kernels/reducedMathPlugin.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef _REDUCED_MATH_PLUGIN_H 18 | #define _REDUCED_MATH_PLUGIN_H 19 | // Dynamically strength-reduced div and mod 20 | // 21 | // Ideas taken from Sean Baxter's MGPU library. 22 | // These classes provide for reduced complexity division and modulus 23 | // on integers, for the case where the same divisor or modulus will 24 | // be used repeatedly. 25 | 26 | namespace nvinfer1 27 | { 28 | namespace plugin 29 | { 30 | namespace detail 31 | { 32 | 33 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff); 34 | 35 | __host__ __device__ __forceinline__ unsigned int umulhi(unsigned int x, unsigned int y) 36 | { 37 | #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 100 38 | return __umulhi(x, y); 39 | #else 40 | unsigned long long z = (unsigned long long) x * (unsigned long long) y; 41 | return (unsigned int) (z >> 32); 42 | #endif 43 | } 44 | 45 | // This is a weird implementation that returns div_up(0,1)=0 but 46 | // div_up(0,2)=1 (wrong) -- just do not use it with a=0. 47 | __host__ __device__ inline int div_up(int a, int b) 48 | { 49 | return (a - 1) / b + 1; 50 | } 51 | 52 | } // end namespace detail 53 | 54 | class reduced_divisor 55 | { 56 | public: 57 | reduced_divisor() {} 58 | __host__ __forceinline__ reduced_divisor(int _y) 59 | : y(_y) 60 | { 61 | detail::find_divisor(y, mul_coeff, shift_coeff); 62 | } 63 | __host__ __device__ __forceinline__ reduced_divisor(unsigned _mul_coeff, unsigned _shift_coeff, int _y) 64 | : mul_coeff(_mul_coeff) 65 | , shift_coeff(_shift_coeff) 66 | , y(_y) 67 | { 68 | } 69 | __host__ __device__ __forceinline__ int div(int x) const 70 | { 71 | // if dividing by 1, then find_divisor wouldn't have worked because 72 | // mul_coeff would have had to be 2^32, which can't be represented, 73 | // so we have to special case that one. 74 | return (y != 1) ? detail::umulhi((unsigned int) x, mul_coeff) >> shift_coeff : x; 75 | } 76 | __host__ __device__ __forceinline__ int mod(int x) const 77 | { 78 | return x - (div(x) * y); 79 | } 80 | __host__ __device__ __forceinline__ void divmod(int x, int& q, int& mod) const 81 | { 82 | q = div(x); 83 | mod = x - (q * y); 84 | } 85 | __host__ __device__ __forceinline__ int get() const 86 | { 87 | return y; 88 | } 89 | inline __host__ void get_mul_shift(unsigned& mul, unsigned& shift) 90 | { 91 | mul = mul_coeff; 92 | shift = shift_coeff; 93 | } 94 | 95 | protected: 96 | unsigned int mul_coeff; 97 | unsigned int shift_coeff; 98 | int y; 99 | }; 100 | 101 | } // namespace plugin 102 | 103 | } // namespace nvinfer1 104 | #endif /*_REDUCED_MATH_PLUGIN_H*/ 105 | -------------------------------------------------------------------------------- /einsum/einsum_common8/kernels/reorgForward.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "reducedMathPlugin.h" 17 | #include "kernel.h" 18 | 19 | using namespace nvinfer1::plugin; // for reduced_divisor 20 | 21 | template 22 | __launch_bounds__(nthdsPerCTA) 23 | __global__ void reorgKernel( 24 | const float* input, // input tensor of shape (batch, C, H, W) 25 | const int volume, // note that volumes of input and output tensors are the same 26 | reduced_divisor batch, 27 | reduced_divisor C, 28 | reduced_divisor H, 29 | reduced_divisor W, 30 | reduced_divisor C_out, 31 | reduced_divisor stride, 32 | float* output) // output tensor of shape (batch, C * stride * stride, H / stride, W / stride) 33 | { 34 | /* 35 | * Reference 36 | * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas_kernels.cu#L370 37 | * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas.c#L9 38 | */ 39 | 40 | // outIndex is row-major position of input coordinates 41 | for (int outIndex = blockIdx.x * nthdsPerCTA + threadIdx.x; outIndex < volume; outIndex += nthdsPerCTA) 42 | { 43 | int i = outIndex; 44 | 45 | // calculate output coordinates from outIndex 46 | int outW, outH, outC; 47 | W.divmod(i, i, outW); 48 | H.divmod(i, i, outH); 49 | C.divmod(i, i, outC); 50 | int outN = i; 51 | 52 | // calculate input coordinates based on output coordinates 53 | // offset is [0, 1, ..., stride * stride - 1] = posH * stride + posW 54 | int offset, inC, posH, posW; 55 | C_out.divmod(outC, offset, inC); 56 | stride.divmod(offset, posH, posW); 57 | int inH = outH * stride.get() + posH; 58 | int inW = outW * stride.get() + posW; 59 | int inN = outN; 60 | 61 | // inIndex is row-major position of input coordinates 62 | int inIndex = inW + W.get() * stride.get() * (inH + H.get() * stride.get() * (inC + C_out.get() * inN)); 63 | 64 | output[outIndex] = input[inIndex]; 65 | } 66 | } 67 | 68 | pluginStatus_t reorgGPU( 69 | cudaStream_t stream, 70 | const int batch, 71 | const int C, 72 | const int H, 73 | const int W, 74 | const int stride, 75 | const float* input, 76 | float* output) 77 | { 78 | const int BS = 512; // number of threads in one block 79 | const int volume = batch * C * H * W; // size of input tensor 80 | const int GS = (volume + BS - 1) / BS; // number of blocks to launch, calculated so global number of threads is >= volume 81 | 82 | reduced_divisor C_out(C / (stride * stride)); 83 | reorgKernel<<>>(input, volume, reduced_divisor(batch), reduced_divisor(C), reduced_divisor(H), reduced_divisor(W), C_out, reduced_divisor(stride), output); 84 | return STATUS_SUCCESS; 85 | } 86 | 87 | pluginStatus_t reorgInference( 88 | cudaStream_t stream, 89 | const int batch, 90 | const int C, 91 | const int H, 92 | const int W, 93 | const int stride, 94 | const void* input, 95 | void* output) 96 | { 97 | return reorgGPU(stream, batch, C, H, W, stride, (const float*) input, (float*) output); 98 | } 99 | -------------------------------------------------------------------------------- /einsum/einsum_common8/logger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "logger.h" 18 | #include "ErrorRecorder.h" 19 | #include "logging.h" 20 | 21 | SampleErrorRecorder gRecorder; 22 | namespace sample 23 | { 24 | Logger gLogger{Logger::Severity::kINFO}; 25 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; 26 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; 27 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; 28 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; 29 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; 30 | 31 | void setReportableSeverity(Logger::Severity severity) 32 | { 33 | gLogger.setReportableSeverity(severity); 34 | gLogVerbose.setReportableSeverity(severity); 35 | gLogInfo.setReportableSeverity(severity); 36 | gLogWarning.setReportableSeverity(severity); 37 | gLogError.setReportableSeverity(severity); 38 | gLogFatal.setReportableSeverity(severity); 39 | } 40 | } // namespace sample 41 | -------------------------------------------------------------------------------- /einsum/einsum_common8/logger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef LOGGER_H 18 | #define LOGGER_H 19 | 20 | #include "logging.h" 21 | 22 | class SampleErrorRecorder; 23 | extern SampleErrorRecorder gRecorder; 24 | namespace sample 25 | { 26 | extern Logger gLogger; 27 | extern LogStreamConsumer gLogVerbose; 28 | extern LogStreamConsumer gLogInfo; 29 | extern LogStreamConsumer gLogWarning; 30 | extern LogStreamConsumer gLogError; 31 | extern LogStreamConsumer gLogFatal; 32 | 33 | void setReportableSeverity(Logger::Severity severity); 34 | } // namespace sample 35 | 36 | #endif // LOGGER_H 37 | -------------------------------------------------------------------------------- /einsum/einsum_common8/nmsHelper.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "plugin.h" 18 | #include "cuda_fp16.h" 19 | #include 20 | 21 | using namespace nvinfer1; 22 | using namespace nvinfer1::plugin; 23 | 24 | size_t detectionForwardBBoxDataSize(int N, int C1, DataType DT_BBOX) 25 | { 26 | if (DT_BBOX == DataType::kFLOAT) 27 | { 28 | return N * C1 * sizeof(float); 29 | } 30 | if (DT_BBOX == DataType::kHALF) 31 | { 32 | return N * C1 * sizeof(__half); 33 | } 34 | 35 | printf("Only FP32/FP16 type bounding boxes are supported.\n"); 36 | return (size_t) -1; 37 | } 38 | 39 | size_t detectionForwardBBoxPermuteSize(bool shareLocation, int N, int C1, DataType DT_BBOX) 40 | { 41 | if (DT_BBOX == DataType::kFLOAT) 42 | { 43 | return shareLocation ? 0 : N * C1 * sizeof(float); 44 | } 45 | if (DT_BBOX == DataType::kHALF) 46 | { 47 | return shareLocation ? 0 : N * C1 * sizeof(__half); 48 | } 49 | 50 | printf("Only FP32/FP16 type bounding boxes are supported.\n"); 51 | return (size_t) -1; 52 | } 53 | 54 | size_t detectionForwardPreNMSSize(int N, int C2) 55 | { 56 | ASSERT(sizeof(float) == sizeof(int)); 57 | return N * C2 * sizeof(float); 58 | } 59 | 60 | size_t detectionForwardPostNMSSize(int N, int numClasses, int topK) 61 | { 62 | ASSERT(sizeof(float) == sizeof(int)); 63 | return N * numClasses * topK * sizeof(float); 64 | } 65 | -------------------------------------------------------------------------------- /einsum/einsum_common8/nmsUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_NMS_UTILS_H 18 | #define TRT_NMS_UTILS_H 19 | 20 | #include "plugin.h" 21 | 22 | using namespace nvinfer1; 23 | using namespace nvinfer1::plugin; 24 | 25 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 26 | int topK, DataType DT_BBOX, DataType DT_SCORE); 27 | #endif 28 | -------------------------------------------------------------------------------- /einsum/einsum_common8/parserOnnxConfig.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef PARSER_ONNX_CONFIG_H 18 | #define PARSER_ONNX_CONFIG_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include "NvInfer.h" 25 | #include "NvOnnxConfig.h" 26 | #include "NvOnnxParser.h" 27 | 28 | #define ONNX_DEBUG 1 29 | 30 | /** 31 | * \class ParserOnnxConfig 32 | * \brief Configuration Manager Class Concrete Implementation 33 | * 34 | * \note: 35 | * 36 | */ 37 | 38 | using namespace std; 39 | 40 | class ParserOnnxConfig : public nvonnxparser::IOnnxConfig 41 | { 42 | 43 | protected: 44 | string mModelFilename{}; 45 | string mTextFilename{}; 46 | string mFullTextFilename{}; 47 | nvinfer1::DataType mModelDtype; 48 | nvonnxparser::IOnnxConfig::Verbosity mVerbosity; 49 | bool mPrintLayercInfo; 50 | 51 | public: 52 | ParserOnnxConfig() 53 | : mModelDtype(nvinfer1::DataType::kFLOAT) 54 | , mVerbosity(static_cast(nvinfer1::ILogger::Severity::kWARNING)) 55 | , mPrintLayercInfo(false) 56 | { 57 | #ifdef ONNX_DEBUG 58 | if (isDebug()) 59 | { 60 | std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl; 61 | } 62 | #endif 63 | } 64 | 65 | protected: 66 | ~ParserOnnxConfig() 67 | { 68 | #ifdef ONNX_DEBUG 69 | if (isDebug()) 70 | { 71 | std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl; 72 | } 73 | #endif 74 | } 75 | 76 | public: 77 | virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept 78 | { 79 | mModelDtype = modelDtype; 80 | } 81 | 82 | virtual nvinfer1::DataType getModelDtype() const noexcept 83 | { 84 | return mModelDtype; 85 | } 86 | 87 | virtual const char* getModelFileName() const noexcept 88 | { 89 | return mModelFilename.c_str(); 90 | } 91 | virtual void setModelFileName(const char* onnxFilename) noexcept 92 | { 93 | mModelFilename = string(onnxFilename); 94 | } 95 | virtual nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept 96 | { 97 | return mVerbosity; 98 | } 99 | virtual void addVerbosity() noexcept 100 | { 101 | ++mVerbosity; 102 | } 103 | virtual void reduceVerbosity() noexcept 104 | { 105 | --mVerbosity; 106 | } 107 | virtual void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept 108 | { 109 | mVerbosity = verbosity; 110 | } 111 | 112 | virtual const char* getTextFileName() const noexcept 113 | { 114 | return mTextFilename.c_str(); 115 | } 116 | virtual void setTextFileName(const char* textFilename) noexcept 117 | { 118 | mTextFilename = string(textFilename); 119 | } 120 | virtual const char* getFullTextFileName() const noexcept 121 | { 122 | return mFullTextFilename.c_str(); 123 | } 124 | virtual void setFullTextFileName(const char* fullTextFilename) noexcept 125 | { 126 | mFullTextFilename = string(fullTextFilename); 127 | } 128 | virtual bool getPrintLayerInfo() const noexcept 129 | { 130 | return mPrintLayercInfo; 131 | } 132 | virtual void setPrintLayerInfo(bool src) noexcept 133 | { 134 | mPrintLayercInfo = src; 135 | } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() 136 | 137 | virtual bool isDebug() const noexcept 138 | { 139 | #if ONNX_DEBUG 140 | return (std::getenv("ONNX_DEBUG") ? true : false); 141 | #else 142 | return false; 143 | #endif 144 | } 145 | 146 | virtual void destroy() noexcept 147 | { 148 | delete this; 149 | } 150 | 151 | }; // class ParserOnnxConfig 152 | 153 | #endif 154 | -------------------------------------------------------------------------------- /einsum/einsum_common8/pluginLogger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef PLUGIN_LOGGER_H 18 | #define PLUGIN_LOGGER_H 19 | 20 | #include "pluginLogging.h" 21 | 22 | namespace 23 | { 24 | Logger gLogger{Logger::Severity::kINFO}; 25 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; 26 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; 27 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; 28 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; 29 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; 30 | } // namespace 31 | 32 | #endif // PLUGIN_LOGGER_H 33 | -------------------------------------------------------------------------------- /einsum/einsum_common8/reducedMathPlugin.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | namespace nvinfer1 18 | { 19 | namespace plugin 20 | { 21 | namespace detail 22 | { 23 | 24 | // Count leading zeros - start from most significant bit. 25 | int clz(int x) 26 | { 27 | for (int i = 31; i >= 0; --i) 28 | { 29 | if ((1 << i) & x) 30 | { 31 | return 31 - i; 32 | } 33 | } 34 | return 32; 35 | } 36 | 37 | #define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) -1))) 38 | 39 | int find_log_2(int x, bool round_up = false) 40 | { 41 | int a = 31 - clz(x); 42 | if (round_up) 43 | { 44 | a += !CUDNN_IS_POW_2(x); 45 | } 46 | return a; 47 | } 48 | 49 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff) 50 | { 51 | if (denom == 0) 52 | { 53 | return; 54 | } 55 | if (denom == 1) 56 | { 57 | // if dividing by 1, reduced math doesn't work because mul_coeff would 58 | // need to be 2^32, which doesn't fit into unsigned int. the div() 59 | // routine handles this special case separately. 60 | mul_coeff = 0; 61 | shift_coeff = 0; 62 | return; 63 | } 64 | // To express the division N/D in terms of a multiplication, what we first 65 | // imagine is simply N*(1/D). However, 1/D will always evaluate to 0 (for D>1), 66 | // so we need another way. There's nothing that says we have to use exactly 67 | // the fraction 1/D; instead it could be any X/Y that reduces to 1/D (i.e., 68 | // Y=X*D), or at least to "close enough" to it. If we pick Y that is a power 69 | // of two, then the N*(X/Y) can be N*X followed by a right-shift by some amount. 70 | // The power of two we should pick should be at least 2^32, because in the 71 | // div() routine we'll use umulhi(), which returns only the upper 32 bits -- 72 | // this being equivalent to a right-shift by 32. But we might want a higher 73 | // power of two for better accuracy depending on the magnitude of the denominator. 74 | // Once we've picked Y, then X [our mul_coeff value] is simply Y/D, rounding up, 75 | // and we save shift_coeff as whatever further shift we have to do beyond 76 | // what the umulhi() implies. 77 | unsigned int p = 31 + find_log_2(denom, true); 78 | unsigned int m = ((1ull << p) + (unsigned int) denom - 1) / (unsigned int) denom; 79 | mul_coeff = m; 80 | shift_coeff = p - 32; 81 | } 82 | 83 | } // namespace detail 84 | 85 | } // namespace plugin 86 | 87 | } // namespace nvinfer1 88 | -------------------------------------------------------------------------------- /einsum/einsum_common8/sampleInference.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TRT_SAMPLE_INFERENCE_H 18 | #define TRT_SAMPLE_INFERENCE_H 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "NvInfer.h" 26 | 27 | #include "sampleReporting.h" 28 | #include "sampleUtils.h" 29 | 30 | namespace sample 31 | { 32 | 33 | struct InferenceEnvironment 34 | { 35 | TrtUniquePtr engine; 36 | std::unique_ptr profiler; 37 | std::vector> context; 38 | std::vector> bindings; 39 | bool error{false}; 40 | }; 41 | 42 | //! 43 | //! \brief Set up contexts and bindings for inference 44 | //! 45 | bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference); 46 | 47 | //! 48 | //! \brief Deserialize the engine and time how long it takes. 49 | //! 50 | bool timeDeserialize(InferenceEnvironment& iEnv); 51 | 52 | //! 53 | //! \brief Run inference and collect timing, return false if any error hit during inference 54 | //! 55 | bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv, int device, std::vector& trace); 56 | 57 | } // namespace sample 58 | 59 | #endif // TRT_SAMPLE_INFERENCE_H 60 | -------------------------------------------------------------------------------- /einsum/einsum_common8/serialize.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | using std::cerr; 26 | using std::cout; 27 | using std::endl; 28 | 29 | template 30 | inline void serialize_value(void** buffer, T const& value); 31 | 32 | template 33 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value); 34 | 35 | namespace 36 | { 37 | 38 | template 39 | struct Serializer 40 | { 41 | }; 42 | 43 | template 44 | struct Serializer::value || std::is_enum::value || std::is_pod::value>::type> 46 | { 47 | static size_t serialized_size(T const& value) 48 | { 49 | return sizeof(T); 50 | } 51 | static void serialize(void** buffer, T const& value) 52 | { 53 | ::memcpy(*buffer, &value, sizeof(T)); 54 | reinterpret_cast(*buffer) += sizeof(T); 55 | } 56 | static void deserialize(void const** buffer, size_t* buffer_size, T* value) 57 | { 58 | assert(*buffer_size >= sizeof(T)); 59 | ::memcpy(value, *buffer, sizeof(T)); 60 | reinterpret_cast(*buffer) += sizeof(T); 61 | *buffer_size -= sizeof(T); 62 | } 63 | }; 64 | 65 | template <> 66 | struct Serializer 67 | { 68 | static size_t serialized_size(const char* value) 69 | { 70 | return strlen(value) + 1; 71 | } 72 | static void serialize(void** buffer, const char* value) 73 | { 74 | ::strcpy(static_cast(*buffer), value); 75 | reinterpret_cast(*buffer) += strlen(value) + 1; 76 | } 77 | static void deserialize(void const** buffer, size_t* buffer_size, const char** value) 78 | { 79 | *value = static_cast(*buffer); 80 | size_t data_size = strnlen(*value, *buffer_size) + 1; 81 | assert(*buffer_size >= data_size); 82 | reinterpret_cast(*buffer) += data_size; 83 | *buffer_size -= data_size; 84 | } 85 | }; 86 | 87 | template 88 | struct Serializer, 89 | typename std::enable_if::value || std::is_enum::value || std::is_pod::value>::type> 90 | { 91 | static size_t serialized_size(std::vector const& value) 92 | { 93 | return sizeof(value.size()) + value.size() * sizeof(T); 94 | } 95 | static void serialize(void** buffer, std::vector const& value) 96 | { 97 | serialize_value(buffer, value.size()); 98 | size_t nbyte = value.size() * sizeof(T); 99 | ::memcpy(*buffer, value.data(), nbyte); 100 | reinterpret_cast(*buffer) += nbyte; 101 | } 102 | static void deserialize(void const** buffer, size_t* buffer_size, std::vector* value) 103 | { 104 | size_t size; 105 | deserialize_value(buffer, buffer_size, &size); 106 | value->resize(size); 107 | size_t nbyte = value->size() * sizeof(T); 108 | assert(*buffer_size >= nbyte); 109 | ::memcpy(value->data(), *buffer, nbyte); 110 | reinterpret_cast(*buffer) += nbyte; 111 | *buffer_size -= nbyte; 112 | } 113 | }; 114 | 115 | } // namespace 116 | 117 | template 118 | inline size_t serialized_size(T const& value) 119 | { 120 | return Serializer::serialized_size(value); 121 | } 122 | 123 | template 124 | inline void serialize_value(void** buffer, T const& value) 125 | { 126 | return Serializer::serialize(buffer, value); 127 | } 128 | 129 | template 130 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value) 131 | { 132 | return Serializer::deserialize(buffer, buffer_size, value); 133 | } 134 | -------------------------------------------------------------------------------- /function/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | AUX_SOURCE_DIRECTORY(./ DIR_LIB_SRCS) 2 | #SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) 3 | ADD_LIBRARY(functionlib ${DIR_LIB_SRCS}) 4 | -------------------------------------------------------------------------------- /function/TrtInfer.cpp: -------------------------------------------------------------------------------- 1 | #include "TrtInfer.h" 2 | void allocate_buffers(std::unique_ptr &engine,int max_batch_size, 3 | std::vector &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers) 4 | { 5 | // 3.2 分配输入、输出内存(cpu+gpu) 6 | int NbBindings = engine->getNbBindings(); // number of input+output 7 | // void* buffers[NbBindings]; // initialize buffers(for gpu data) 8 | 9 | for (int i = 0; i < NbBindings; i++) 10 | { 11 | auto dims = engine->getBindingDimensions(i); 12 | size_t vol = static_cast(max_batch_size); 13 | DataType type = engine->getBindingDataType(i); 14 | vol *= samplesCommon::volume(dims); 15 | size_t size_binding = vol * samplesCommon::getElementSize(type); 16 | 17 | cudaMalloc(&buffers[i], size_binding); // allocate gpu memery 18 | std::vector temp_data(vol); 19 | bool is_input = engine->bindingIsInput(i); 20 | if(is_input){ // 分配 21 | inputIndex.push_back(i); 22 | input_cpu_data.push_back(temp_data); // 创建cpu输入 23 | input_cpu_data.size.push_back(size_binding); // 记录输入占用字节数 24 | } 25 | else { 26 | output_cpu_data.push_back(temp_data); 27 | output_cpu_data.size.push_back(size_binding); 28 | } 29 | } 30 | return; 31 | } 32 | void trt_infer(cudaStream_t &stream,std::unique_ptr &context,int max_batch_size, 33 | std::vector &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers) 34 | { 35 | auto start_time = std::chrono::system_clock::now(); 36 | for(int i=0; ienqueue(max_batch_size,buffers,stream,nullptr); 40 | context->execute(max_batch_size,buffers); 41 | for(int i=0; isize();i++){ 42 | cudaMemcpyAsync(output_cpu_data[i].data(), buffers[i+inputIndex.size()], output_cpu_data.size[i], cudaMemcpyDeviceToHost, stream); 43 | } 44 | cudaStreamSynchronize(stream); 45 | auto end_time = std::chrono::system_clock::now(); 46 | std::cout << "infer time: " << std::chrono::duration_cast(end_time - start_time).count() << "ms" << std::endl; 47 | } 48 | -------------------------------------------------------------------------------- /function/function.cpp: -------------------------------------------------------------------------------- 1 | #include "function.h" 2 | using namespace std; 3 | using namespace nvinfer1; 4 | Logger gLogger; 5 | std::vector> reshape_1to2D(std::vector shape,std::vector data){ 6 | std::vector> output(shape[0]); 7 | for(int i=0; i> arrays,int max_lengths){ 18 | for(int i = 0; i < arrays.size() && i engineData(fsize); 39 | engineFile.read(engineData.data(), fsize); 40 | if (!engineFile) 41 | { 42 | std::cout << "Error loading engine file: " << engine << std::endl; 43 | return nullptr; 44 | } 45 | 46 | std::unique_ptr runtime(createInferRuntime(gLogger)); 47 | 48 | return runtime->deserializeCudaEngine(engineData.data(), fsize, nullptr); 49 | } 50 | -------------------------------------------------------------------------------- /function/image.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "image.hpp" 3 | 4 | static const float kMean[3] = { 0.485f, 0.456f, 0.406f }; 5 | static const float kStdDev[3] = { 0.229f, 0.224f, 0.225f }; 6 | static const int map_[7][3] = { {0,0,0} , 7 | {128,0,0}, 8 | {0,128,0}, 9 | {0,0,128}, 10 | {128,128,0}, 11 | {128,0,128}, 12 | {0,128,0}}; 13 | 14 | 15 | float* normal(cv::Mat img) { 16 | //将cv::Mat格式的图片,转换成一维float向量 17 | float * data = (float*)calloc(img.rows*img.cols * img.channels(), sizeof(float)); 18 | // printf("image channel %d\n",img.channels()); 19 | if(img.channels()==3){ 20 | for (int c = 0; c < 3; ++c) 21 | { 22 | for (int i = 0; i < img.rows; ++i) 23 | { //获取第i行首像素指针 24 | cv::Vec3b *p1 = img.ptr(i); 25 | for (int j = 0; j < img.cols; ++j) 26 | { 27 | data[c * img.cols * img.rows + i * img.cols + j] = (p1[j][c] / 255.0f - kMean[c]) / kStdDev[c]; 28 | } 29 | } 30 | } 31 | } 32 | else if(img.channels()==1){ 33 | for (int c = 0; c < 1; ++c) 34 | { 35 | for (int i = 0; i < img.rows; ++i) 36 | { //获取第i行首像素指针 37 | cv::Vec *p1 = img.ptr>(i); 38 | for (int j = 0; j < img.cols; ++j) 39 | { 40 | data[c * img.cols * img.rows + i * img.cols + j] = p1[j][c]; 41 | } 42 | } 43 | } 44 | } 45 | else{ 46 | printf("!!!!!!!!!!!!!!!!!!图片输入错误\n"); 47 | } 48 | return data; 49 | } 50 | -------------------------------------------------------------------------------- /gcn.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/gcn.onnx -------------------------------------------------------------------------------- /generate_onnx.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | import onnx 6 | 7 | class model(nn.Module): 8 | def __init__(self, in_channel=1): 9 | super().__init__() 10 | self.A = np.ones(shape=(3, 15, 15)) 11 | self.A = torch.tensor(self.A, dtype=torch.float32, requires_grad=False) 12 | 13 | def forward(self, x: torch.Tensor): 14 | x = x.permute(0, 2, 3, 1, 4).contiguous() 15 | x = torch.einsum('nctkv,kvw->nctw', x, self.A) 16 | return x 17 | 18 | input_tensor = torch.ones(size=[1, 3, 1, 1, 15]) 19 | Model = model() 20 | Model.cuda() 21 | out = Model(input_tensor) 22 | 23 | input_name = ['input1'] # , 'input2'] 24 | output_name = ['output1'] # , 'output2'] # 必须要有输入输出 25 | torch.onnx.export(Model, 26 | input_tensor, 27 | './gcn.onnx', 28 | input_names=input_name, output_names=output_name, 29 | verbose=True, 30 | opset_version=12 31 | ) 32 | model = onnx.load('./gcn.onnx') 33 | print(onnx.checker.check_model(model)) 34 | print(out) 35 | print(out.size()) 36 | print(out.sum()) 37 | 38 | -------------------------------------------------------------------------------- /include/Head.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "function.h" 3 | #include "TrtInfer.h" 4 | -------------------------------------------------------------------------------- /include/TrtInfer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "function.h" 3 | void allocate_buffers(std::unique_ptr &engine,int max_batch_size, 4 | std::vector &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers); 5 | void trt_infer(cudaStream_t &stream,std::unique_ptr &context,int max_batch_size, 6 | std::vector &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers); 7 | -------------------------------------------------------------------------------- /include/function.h: -------------------------------------------------------------------------------- 1 | //#ifndef FUNCTION_H 2 | //#define FUNCTION_H 3 | #pragma once 4 | #include 5 | #include "NvInferRuntime.h" 6 | #include "NvInferRuntimeCommon.h" 7 | #include "NvOnnxConfig.h" 8 | #include "NvOnnxParser.h" 9 | #include "NvUtils.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "buffers.h" 20 | #include "image.hpp" 21 | 22 | class CPU_data :public std::vector> 23 | { 24 | public: 25 | CPU_data() {} 26 | std::vector size; // 记录每个数据的内存占用字节数 *sizeof(float) 27 | }; 28 | 29 | using namespace nvinfer1; 30 | class Logger : public ILogger 31 | { 32 | void log(Severity severity, const char* msg) throw() override 33 | { 34 | // suppress info-level messages 35 | if (severity != Severity::kINFO) 36 | std::cout << msg << std::endl; 37 | } 38 | }; 39 | 40 | extern Logger gLogger; 41 | struct InferDeleter 42 | { 43 | template 44 | void operator()(T* obj) const{ 45 | if (obj) 46 | { 47 | obj->destroy(); 48 | } 49 | } 50 | }; 51 | 52 | struct CudaDeleter 53 | { 54 | void operator()(void* obj){ 55 | if (obj) 56 | { 57 | cudaFree(obj); 58 | } 59 | } 60 | }; 61 | //constexpr long double operator"" _GiB(long double val) 62 | //{ 63 | // return val * (1 << 30); 64 | //} 65 | //constexpr long double operator"" _MiB(long double val) 66 | //{ 67 | // return val * (1 << 20); 68 | //} 69 | //constexpr long double operator"" _KiB(long double val) 70 | //{ 71 | // return val * (1 << 10); 72 | //} 73 | std::vector> reshape_1to2D(std::vector shape,std::vector data); 74 | void printfVector2D(std::vector> arrays,int max_lengths); 75 | 76 | ICudaEngine* loadEngine(const std::string& engine); 77 | 78 | //#endif // FUNCTION_H 79 | -------------------------------------------------------------------------------- /include/image.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | typedef struct { 3 | int w; 4 | int h; 5 | int c; 6 | float *data; 7 | } image; 8 | float* normal(cv::Mat img); 9 | -------------------------------------------------------------------------------- /infer_test.cpp: -------------------------------------------------------------------------------- 1 | #include "Head.h" 2 | 3 | using namespace std; 4 | using namespace nvinfer1; 5 | 6 | std::string image_path = "../data/tabby_tiger_cat.jpg"; 7 | std::string engine_path = "../data/resnet50.engine"; 8 | const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 9 | 10 | // 0. 参数 11 | int max_batch_size = 1; 12 | int INPUT_H=224,INPUT_W=224,INPUT_C=3; 13 | std::vector> OUTPUT_SIZE{{7,7},{1,1000}}; // for reshape output 14 | //std::vector> OUTPUT_SIZE{{1,1000}}; 15 | 16 | int main() 17 | { 18 | std::unique_ptr engine(loadEngine(engine_path)); 19 | 20 | // 5. 生成context 21 | std::unique_ptr context(engine->createExecutionContext()); 22 | // 3. 推理 23 | // 3.1 构建stream 24 | cudaStream_t stream; 25 | cudaStreamCreate(&stream); 26 | 27 | // 3.2 分配输入、输出内存(cpu+gpu) 28 | std::vector inputIndex; // 输入索引 29 | CPU_data input_cpu_data, output_cpu_data; 30 | int NbBindings = engine->getNbBindings(); // number of input+output 31 | void* buffers[NbBindings]; // initialize buffers(for gpu data) 32 | 33 | for (int i = 0; i < NbBindings; i++) 34 | { 35 | auto dims = engine->getBindingDimensions(i); 36 | size_t vol = static_cast(max_batch_size); 37 | DataType type = engine->getBindingDataType(i); 38 | vol *= samplesCommon::volume(dims); 39 | size_t size_binding = vol * samplesCommon::getElementSize(type); 40 | 41 | cudaMalloc(&buffers[i], size_binding); // allocate gpu memery 42 | vector temp_data(vol); 43 | bool is_input = engine->bindingIsInput(i); 44 | if(is_input){ // 分配 45 | inputIndex.push_back(i); 46 | input_cpu_data.push_back(temp_data); // 创建cpu输入 47 | input_cpu_data.size.push_back(size_binding); // 记录输入占用字节数 48 | } 49 | else { 50 | output_cpu_data.push_back(temp_data); 51 | output_cpu_data.size.push_back(size_binding); 52 | } 53 | } 54 | // 3.3 加载输入 55 | cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR); 56 | cv::cvtColor(img,img,cv::COLOR_BGR2RGB); 57 | cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3); 58 | cv::resize(img,dst, dst.size()); 59 | float* fileData=normal(dst); 60 | for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){ 61 | input_cpu_data[0][i] = fileData[i]; 62 | // std::cout << (" .:-=+*#%@"[static_cast(fileData[i]) / 26]) << (((i + 1) % INPUT_W) ? "" : "\n"); 63 | } 64 | free(fileData); // 释放图片 65 | for(int i = 0; i <2;i++){ // 第一次启动GPU,会消耗很长时间,第二次就正常速度了 66 | auto start_time = std::chrono::system_clock::now(); 67 | for(int i=0; ienqueue(max_batch_size,buffers,stream,nullptr); 71 | context->execute(max_batch_size,buffers); 72 | for(int i=0; i(end_time - start_time).count() << "ms" << std::endl; 78 | } 79 | cudaStreamDestroy(stream); 80 | 81 | for(int i = 0; i < OUTPUT_SIZE.size(); ++i){ 82 | std::vector> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]); 83 | printfVector2D(a,10); 84 | } 85 | 86 | return 1; 87 | } 88 | -------------------------------------------------------------------------------- /mytest.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/mytest.cpp -------------------------------------------------------------------------------- /resnet50.cpp: -------------------------------------------------------------------------------- 1 | #include "function.h" 2 | 3 | using namespace std; 4 | using namespace nvinfer1; 5 | 6 | const char* onnxModelFile = "../data/resnet50.onnx"; 7 | const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 8 | 9 | int main() 10 | { 11 | std::unique_ptr builder(createInferBuilder(gLogger)); 12 | builder->setMaxBatchSize(1); 13 | 14 | std::unique_ptr network(builder->createNetworkV2(explicitBatch)); 15 | std::unique_ptr parser(nvonnxparser::createParser(*network, gLogger)); 16 | parser->parseFromFile(onnxModelFile, static_cast(ILogger::Severity::kWARNING)); 17 | 18 | std::unique_ptr config(builder->createBuilderConfig()); 19 | config->setMaxWorkspaceSize(1_GiB); 20 | config->setFlag(BuilderFlag::kGPU_FALLBACK); 21 | config->setFlag(BuilderFlag::kSTRICT_TYPES); 22 | 23 | std::unique_ptr engine(builder->buildEngineWithConfig(*network, *config)); 24 | std::unique_ptr context(engine->createExecutionContext()); 25 | 26 | // 0. 参数 27 | int max_batch_size = 1; 28 | int INPUT_H=224,INPUT_W=224,INPUT_C=3; 29 | std::vector> OUTPUT_SIZE{{7,7},{1,1000}}; // for reshape output 30 | 31 | // 3. 推理 32 | // 3.1 构建stream 33 | cudaStream_t stream; 34 | cudaStreamCreate(&stream); 35 | 36 | // 3.2 分配输入、输出内存(cpu+gpu) 37 | std::vector inputIndex; // 输入索引 38 | CPU_data input_cpu_data, output_cpu_data; 39 | int NbBindings = engine->getNbBindings(); // number of input+output 40 | void* buffers[NbBindings]; // initialize buffers(for gpu data) 41 | 42 | for (int i = 0; i < NbBindings; i++) 43 | { 44 | auto dims = engine->getBindingDimensions(i); 45 | size_t vol = static_cast(max_batch_size); 46 | DataType type = engine->getBindingDataType(i); 47 | vol *= samplesCommon::volume(dims); 48 | size_t size_binding = vol * samplesCommon::getElementSize(type); 49 | 50 | cudaMalloc(&buffers[i], size_binding); // allocate gpu memery 51 | vector temp_data(vol); 52 | bool is_input = engine->bindingIsInput(i); 53 | if(is_input){ // 分配 54 | inputIndex.push_back(i); 55 | input_cpu_data.push_back(temp_data); // 创建cpu输入 56 | input_cpu_data.size.push_back(size_binding); // 记录输入占用字节数 57 | } 58 | else { 59 | output_cpu_data.push_back(temp_data); 60 | output_cpu_data.size.push_back(size_binding); 61 | } 62 | } 63 | // 3.3 加载输入 64 | std::string image_path = "../data/tabby_tiger_cat.jpg"; 65 | cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR); 66 | cv::cvtColor(img,img,cv::COLOR_BGR2RGB); 67 | cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3); 68 | cv::resize(img,dst, dst.size()); 69 | float* fileData=normal(dst); 70 | for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){ 71 | input_cpu_data[0][i] = fileData[i]; 72 | } 73 | free(fileData); // 释放图片 74 | // 3.2 输入从cpu拷贝至gpu 75 | for(int i=0; ienqueue(max_batch_size,buffers,stream,nullptr); 80 | if(is_success) 81 | std::cout << "Forward success !" << std::endl; 82 | else 83 | std::cout << "Forward Error !" << std::endl; 84 | // 3.4 输出从gpu拷贝至cpu 85 | for(int i=0; i> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]); 95 | printfVector2D(a,10); 96 | } 97 | 98 | return 1; 99 | } 100 | -------------------------------------------------------------------------------- /sample.cpp: -------------------------------------------------------------------------------- 1 | #include "Head.h" 2 | 3 | using namespace std; 4 | using namespace nvinfer1; 5 | 6 | std::string image_path = "../data/tabby_tiger_cat.jpg"; 7 | std::string engine_path = "../data/resnet50.engine"; 8 | // 0. 参数 9 | int max_batch_size = 1; 10 | int INPUT_H=224,INPUT_W=224,INPUT_C=3; 11 | std::vector> OUTPUT_SIZE{{7,7},{1,1000}}; // for reshape output 12 | //std::vector> OUTPUT_SIZE{{1,1000}}; 13 | 14 | int main() 15 | { 16 | std::unique_ptr engine(loadEngine(engine_path)); 17 | std::unique_ptr context(engine->createExecutionContext()); 18 | 19 | // 3.1 构建stream 20 | cudaStream_t stream; 21 | cudaStreamCreate(&stream); 22 | 23 | // 3.2 分配输入、输出内存(cpu+gpu) 24 | std::vector inputIndex; // 输入索引 25 | CPU_data input_cpu_data, output_cpu_data; 26 | int NbBindings = engine->getNbBindings(); // number of input+output 27 | void* buffers[NbBindings]; 28 | allocate_buffers(engine,max_batch_size,inputIndex,input_cpu_data,output_cpu_data,buffers); // initialize buffers(for gpu data) 29 | 30 | // 3.3 加载输入 31 | cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR); 32 | cv::cvtColor(img,img,cv::COLOR_BGR2RGB); 33 | cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3); 34 | cv::resize(img,dst, dst.size()); 35 | float* fileData=normal(dst); 36 | for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){ 37 | input_cpu_data[0][i] = fileData[i]; 38 | // std::cout << (" .:-=+*#%@"[static_cast(fileData[i]) / 26]) << (((i + 1) % INPUT_W) ? "" : "\n"); 39 | } 40 | free(fileData); // 释放图片 41 | // infer 42 | for(int i = 0; i <2;i++){ // 第一次启动GPU,会消耗很长时间,第二次就正常速度了 43 | trt_infer(stream,context,max_batch_size,inputIndex,input_cpu_data,output_cpu_data,buffers); 44 | } 45 | 46 | cudaStreamDestroy(stream); 47 | //output postprogress 48 | for(int i = 0; i < OUTPUT_SIZE.size(); ++i){ 49 | std::vector> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]); 50 | printfVector2D(a,10); 51 | } 52 | 53 | return 1; 54 | } 55 | -------------------------------------------------------------------------------- /testNet.cpp: -------------------------------------------------------------------------------- 1 | #include "function.h" 2 | 3 | using namespace std; 4 | using namespace nvinfer1; 5 | 6 | const char* onnxModelFile = "../data/test.onnx"; 7 | const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 8 | 9 | int main() 10 | { 11 | std::unique_ptr builder(createInferBuilder(gLogger)); 12 | builder->setMaxBatchSize(1); 13 | 14 | std::unique_ptr network(builder->createNetworkV2(explicitBatch)); 15 | std::unique_ptr parser(nvonnxparser::createParser(*network, gLogger)); 16 | parser->parseFromFile(onnxModelFile, static_cast(ILogger::Severity::kWARNING)); 17 | 18 | std::unique_ptr config(builder->createBuilderConfig()); 19 | config->setMaxWorkspaceSize(1_GiB); 20 | config->setFlag(BuilderFlag::kGPU_FALLBACK); 21 | config->setFlag(BuilderFlag::kSTRICT_TYPES); 22 | 23 | std::unique_ptr engine(builder->buildEngineWithConfig(*network, *config)); 24 | std::unique_ptr context(engine->createExecutionContext()); 25 | 26 | // 0. 参数 27 | int max_batch_size = 1; 28 | int INPUT_H=224,INPUT_W=224,INPUT_C=3; 29 | std::vector> OUTPUT_SIZE{{4,4},{1,5}}; // for reshape output 30 | 31 | // 3. 推理 32 | // 3.1 构建stream 33 | cudaStream_t stream; 34 | cudaStreamCreate(&stream); 35 | 36 | // 3.2 分配输入、输出内存(cpu+gpu) 37 | std::vector inputIndex; // 输入索引 38 | CPU_data input_cpu_data, output_cpu_data; 39 | int NbBindings = engine->getNbBindings(); // number of input+output 40 | void* buffers[NbBindings]; // initialize buffers(for gpu data) 41 | 42 | for (int i = 0; i < NbBindings; i++) 43 | { 44 | auto dims = engine->getBindingDimensions(i); 45 | size_t vol = static_cast(max_batch_size); 46 | DataType type = engine->getBindingDataType(i); 47 | vol *= samplesCommon::volume(dims); 48 | size_t size_binding = vol * samplesCommon::getElementSize(type); 49 | 50 | cudaMalloc(&buffers[i], size_binding); // allocate gpu memery 51 | vector temp_data(vol); 52 | bool is_input = engine->bindingIsInput(i); 53 | if(is_input){ // 分配 54 | inputIndex.push_back(i); 55 | input_cpu_data.push_back(temp_data); // 创建cpu输入 56 | input_cpu_data.size.push_back(size_binding); // 记录输入占用字节数 57 | } 58 | else { 59 | output_cpu_data.push_back(temp_data); 60 | output_cpu_data.size.push_back(size_binding); 61 | } 62 | } 63 | // 3.3 加载输入 64 | std::string image_path = "../data/tabby_tiger_cat.jpg"; 65 | cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR); 66 | cv::cvtColor(img,img,cv::COLOR_BGR2RGB); 67 | cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3); 68 | cv::resize(img,dst, dst.size()); 69 | float* fileData=normal(dst); 70 | for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){ 71 | input_cpu_data[0][i] = fileData[i]; 72 | // std::cout << (" .:-=+*#%@"[static_cast(fileData[i]) / 26]) << (((i + 1) % INPUT_W) ? "" : "\n"); 73 | } 74 | free(fileData); // 释放图片 75 | // 3.2 输入从cpu拷贝至gpu 76 | for(int i=0; ienqueue(max_batch_size,buffers,stream,nullptr); 81 | if(is_success) 82 | std::cout << "Forward success !" << std::endl; 83 | else 84 | std::cout << "Forward Error !" << std::endl; 85 | // 3.4 输出从gpu拷贝至cpu 86 | for(int i=0; i> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]); 95 | printfVector2D(a,10); 96 | } 97 | 98 | return 1; 99 | } 100 | --------------------------------------------------------------------------------