├── .gitignore
├── .vscode
    └── launch.json
├── CMakeLists.txt
├── CMakeLists.txt.user
├── README.md
├── SourceMnist.cpp
├── cublasTest.cpp
├── data
    └── tabby_tiger_cat.jpg
├── einsum
    ├── CMakeLists.txt
    ├── Einsum.cpp
    ├── Einsum.h
    ├── build
    │   ├── .cmake
    │   │   └── api
    │   │   │   └── v1
    │   │   │       ├── query
    │   │   │           └── client-vscode
    │   │   │           │   └── query.json
    │   │   │       └── reply
    │   │   │           ├── cache-v2-8b57e125de1d3e63be5c.json
    │   │   │           ├── codemodel-v2-91dcb8fb09f012c6e06f.json
    │   │   │           ├── index-2021-06-27T04-48-55-0999.json
    │   │   │           ├── target-einsum_common8_lib-Debug-3718d237bae652e49c1b.json
    │   │   │           ├── target-einsumlib-Debug-0f07c3f3305b026cdd92.json
    │   │   │           └── toolchains-v1-7e03fe365e7edf64e679.json
    │   ├── CMakeCache.txt
    │   ├── CMakeFiles
    │   │   ├── 3.20.5
    │   │   │   ├── CMakeCCompiler.cmake
    │   │   │   ├── CMakeCXXCompiler.cmake
    │   │   │   ├── CMakeDetermineCompilerABI_C.bin
    │   │   │   ├── CMakeDetermineCompilerABI_CXX.bin
    │   │   │   ├── CMakeSystem.cmake
    │   │   │   ├── CompilerIdC
    │   │   │   │   ├── CMakeCCompilerId.c
    │   │   │   │   └── a.out
    │   │   │   └── CompilerIdCXX
    │   │   │   │   ├── CMakeCXXCompilerId.cpp
    │   │   │   │   └── a.out
    │   │   ├── CMakeDirectoryInformation.cmake
    │   │   ├── CMakeError.log
    │   │   ├── CMakeOutput.log
    │   │   ├── Progress
    │   │   │   ├── 2
    │   │   │   ├── 5
    │   │   │   └── count.txt
    │   │   ├── TargetDirectories.txt
    │   │   ├── cmake.check_cache
    │   │   ├── einsum.dir
    │   │   │   ├── DependInfo.cmake
    │   │   │   ├── Einsum.cpp.o.d
    │   │   │   ├── build.make
    │   │   │   ├── cmake_clean.cmake
    │   │   │   ├── cmake_clean_target.cmake
    │   │   │   ├── compiler_depend.internal
    │   │   │   ├── compiler_depend.make
    │   │   │   ├── compiler_depend.ts
    │   │   │   ├── depend.make
    │   │   │   ├── flags.make
    │   │   │   ├── link.txt
    │   │   │   └── progress.make
    │   │   ├── einsumlib.dir
    │   │   │   ├── DependInfo.cmake
    │   │   │   ├── Einsum.cpp.o.d
    │   │   │   ├── build.make
    │   │   │   ├── cmake_clean.cmake
    │   │   │   ├── cmake_clean_target.cmake
    │   │   │   ├── compiler_depend.make
    │   │   │   ├── compiler_depend.ts
    │   │   │   ├── depend.make
    │   │   │   ├── flags.make
    │   │   │   ├── link.txt
    │   │   │   └── progress.make
    │   │   ├── progress.marks
    │   │   └── test.dir
    │   │   │   ├── DependInfo.cmake
    │   │   │   ├── Einsum.cpp.o.d
    │   │   │   ├── build.make
    │   │   │   ├── cmake_clean.cmake
    │   │   │   ├── compiler_depend.make
    │   │   │   ├── compiler_depend.ts
    │   │   │   ├── depend.make
    │   │   │   ├── flags.make
    │   │   │   ├── link.txt
    │   │   │   ├── onnx2trt_gcn.cpp.o.d
    │   │   │   └── progress.make
    │   ├── cmake_install.cmake
    │   ├── common
    │   │   ├── CMakeFiles
    │   │   │   ├── CMakeDirectoryInformation.cmake
    │   │   │   ├── commonlib.dir
    │   │   │   │   ├── DependInfo.cmake
    │   │   │   │   ├── build.make
    │   │   │   │   ├── checkMacrosPlugin.cpp.o.d
    │   │   │   │   ├── cmake_clean.cmake
    │   │   │   │   ├── cmake_clean_target.cmake
    │   │   │   │   ├── compiler_depend.make
    │   │   │   │   ├── compiler_depend.ts
    │   │   │   │   ├── cudaDriverWrapper.cpp.o.d
    │   │   │   │   ├── depend.make
    │   │   │   │   ├── flags.make
    │   │   │   │   ├── getOptions.cpp.o.d
    │   │   │   │   ├── link.txt
    │   │   │   │   ├── logger.cpp.o.d
    │   │   │   │   ├── nmsHelper.cpp.o.d
    │   │   │   │   ├── progress.make
    │   │   │   │   ├── reducedMathPlugin.cpp.o.d
    │   │   │   │   ├── sampleEngines.cpp.o.d
    │   │   │   │   ├── sampleInference.cpp.o.d
    │   │   │   │   ├── sampleOptions.cpp.o.d
    │   │   │   │   └── sampleReporting.cpp.o.d
    │   │   │   └── progress.marks
    │   │   ├── cmake_install.cmake
    │   │   └── kernels
    │   │   │   ├── CMakeFiles
    │   │   │       ├── CMakeDirectoryInformation.cmake
    │   │   │       └── progress.marks
    │   │   │   └── cmake_install.cmake
    │   ├── compile_commands.json
    │   ├── einsum_common7
    │   │   ├── CMakeFiles
    │   │   │   ├── CMakeDirectoryInformation.cmake
    │   │   │   ├── einsum_common7_lib.dir
    │   │   │   │   ├── DependInfo.cmake
    │   │   │   │   ├── build.make
    │   │   │   │   ├── checkMacrosPlugin.cpp.o.d
    │   │   │   │   ├── cmake_clean.cmake
    │   │   │   │   ├── cmake_clean_target.cmake
    │   │   │   │   ├── compiler_depend.internal
    │   │   │   │   ├── compiler_depend.make
    │   │   │   │   ├── compiler_depend.ts
    │   │   │   │   ├── cudaDriverWrapper.cpp.o.d
    │   │   │   │   ├── depend.make
    │   │   │   │   ├── flags.make
    │   │   │   │   ├── getOptions.cpp.o.d
    │   │   │   │   ├── link.txt
    │   │   │   │   ├── logger.cpp.o.d
    │   │   │   │   ├── nmsHelper.cpp.o.d
    │   │   │   │   ├── progress.make
    │   │   │   │   ├── reducedMathPlugin.cpp.o.d
    │   │   │   │   ├── sampleEngines.cpp.o.d
    │   │   │   │   ├── sampleInference.cpp.o.d
    │   │   │   │   ├── sampleOptions.cpp.o.d
    │   │   │   │   └── sampleReporting.cpp.o.d
    │   │   │   └── progress.marks
    │   │   ├── cmake_install.cmake
    │   │   └── kernels
    │   │   │   ├── CMakeFiles
    │   │   │       ├── CMakeDirectoryInformation.cmake
    │   │   │       └── progress.marks
    │   │   │   └── cmake_install.cmake
    │   ├── einsum_common8
    │   │   ├── CMakeFiles
    │   │   │   ├── CMakeDirectoryInformation.cmake
    │   │   │   ├── einsum_common8_lib.dir
    │   │   │   │   ├── DependInfo.cmake
    │   │   │   │   ├── build.make
    │   │   │   │   ├── checkMacrosPlugin.cpp.o.d
    │   │   │   │   ├── cmake_clean.cmake
    │   │   │   │   ├── cmake_clean_target.cmake
    │   │   │   │   ├── compiler_depend.make
    │   │   │   │   ├── compiler_depend.ts
    │   │   │   │   ├── depend.make
    │   │   │   │   ├── flags.make
    │   │   │   │   ├── getOptions.cpp.o.d
    │   │   │   │   ├── link.txt
    │   │   │   │   ├── logger.cpp.o.d
    │   │   │   │   ├── progress.make
    │   │   │   │   ├── reducedMathPlugin.cpp.o.d
    │   │   │   │   ├── sampleEngines.cpp.o.d
    │   │   │   │   ├── sampleInference.cpp.o.d
    │   │   │   │   ├── sampleOptions.cpp.o.d
    │   │   │   │   └── sampleReporting.cpp.o.d
    │   │   │   └── progress.marks
    │   │   ├── cmake_install.cmake
    │   │   └── kernels
    │   │   │   ├── CMakeFiles
    │   │   │       ├── CMakeDirectoryInformation.cmake
    │   │   │       └── progress.marks
    │   │   │   └── cmake_install.cmake
    │   └── test
    ├── einsum_common7
    │   ├── BatchStream.h
    │   ├── CMakeLists.txt
    │   ├── EntropyCalibrator.h
    │   ├── ErrorRecorder.h
    │   ├── argsParser.h
    │   ├── bboxUtils.h
    │   ├── bertCommon.h
    │   ├── buffers.h
    │   ├── checkMacrosPlugin.cpp
    │   ├── checkMacrosPlugin.h
    │   ├── common.cuh
    │   ├── common.h
    │   ├── cub_helper.h
    │   ├── cudaDriverWrapper.cpp
    │   ├── cudaDriverWrapper.h
    │   ├── getOptions.cpp
    │   ├── getOptions.h
    │   ├── half.h
    │   ├── kernels
    │   │   ├── CMakeLists.txt
    │   │   ├── allClassNMS.cu
    │   │   ├── bboxDeltas2Proposals.cu
    │   │   ├── common.cu
    │   │   ├── cropAndResizeKernel.cu
    │   │   ├── decodeBBoxes.cu
    │   │   ├── detectionForward.cu
    │   │   ├── extractFgScores.cu
    │   │   ├── gatherTopDetections.cu
    │   │   ├── generateAnchors.cu
    │   │   ├── gridAnchorLayer.cu
    │   │   ├── kernel.cpp
    │   │   ├── kernel.h
    │   │   ├── lReLU.cu
    │   │   ├── maskRCNNKernels.cu
    │   │   ├── maskRCNNKernels.h
    │   │   ├── nmsLayer.cu
    │   │   ├── normalizeLayer.cu
    │   │   ├── permuteData.cu
    │   │   ├── priorBoxLayer.cu
    │   │   ├── proposalKernel.cu
    │   │   ├── proposalsForward.cu
    │   │   ├── reducedMathPlugin.h
    │   │   ├── regionForward.cu
    │   │   ├── reorgForward.cu
    │   │   ├── roiPooling.cu
    │   │   ├── rproiInferenceFused.cu
    │   │   ├── sortScoresPerClass.cu
    │   │   └── sortScoresPerImage.cu
    │   ├── logger.cpp
    │   ├── logger.h
    │   ├── logging.h
    │   ├── nmsHelper.cpp
    │   ├── nmsUtils.h
    │   ├── parserOnnxConfig.h
    │   ├── plugin.h
    │   ├── pluginLogger.h
    │   ├── pluginLogging.h
    │   ├── reducedMathPlugin.cpp
    │   ├── sampleConfig.h
    │   ├── sampleDevice.h
    │   ├── sampleEngines.cpp
    │   ├── sampleEngines.h
    │   ├── sampleInference.cpp
    │   ├── sampleInference.h
    │   ├── sampleOptions.cpp
    │   ├── sampleOptions.h
    │   ├── sampleReporting.cpp
    │   ├── sampleReporting.h
    │   ├── sampleUtils.h
    │   └── serialize.hpp
    └── einsum_common8
    │   ├── BatchStream.h
    │   ├── CMakeLists.txt
    │   ├── EntropyCalibrator.h
    │   ├── ErrorRecorder.h
    │   ├── argsParser.h
    │   ├── bboxUtils.h
    │   ├── bertCommon.h
    │   ├── buffers.h
    │   ├── checkMacrosPlugin.cpp
    │   ├── checkMacrosPlugin.h
    │   ├── common.cuh
    │   ├── common.h
    │   ├── cub_helper.h
    │   ├── cudaDriverWrapper.cpp
    │   ├── cudaDriverWrapper.h
    │   ├── getOptions.cpp
    │   ├── getOptions.h
    │   ├── half.h
    │   ├── kernels
    │       ├── CMakeLists.txt
    │       ├── allClassNMS.cu
    │       ├── bboxDeltas2Proposals.cu
    │       ├── common.cu
    │       ├── cropAndResizeKernel.cu
    │       ├── decodeBBoxes.cu
    │       ├── detectionForward.cu
    │       ├── extractFgScores.cu
    │       ├── gatherTopDetections.cu
    │       ├── generateAnchors.cu
    │       ├── gridAnchorLayer.cu
    │       ├── kernel.cpp
    │       ├── kernel.h
    │       ├── lReLU.cu
    │       ├── maskRCNNKernels.cu
    │       ├── maskRCNNKernels.h
    │       ├── nmsLayer.cu
    │       ├── normalizeLayer.cu
    │       ├── permuteData.cu
    │       ├── priorBoxLayer.cu
    │       ├── proposalKernel.cu
    │       ├── proposalsForward.cu
    │       ├── reducedMathPlugin.h
    │       ├── regionForward.cu
    │       ├── reorgForward.cu
    │       ├── roiPooling.cu
    │       ├── rproiInferenceFused.cu
    │       ├── sortScoresPerClass.cu
    │       └── sortScoresPerImage.cu
    │   ├── logger.cpp
    │   ├── logger.h
    │   ├── logging.h
    │   ├── nmsHelper.cpp
    │   ├── nmsUtils.h
    │   ├── parserOnnxConfig.h
    │   ├── plugin.h
    │   ├── pluginLogger.h
    │   ├── pluginLogging.h
    │   ├── reducedMathPlugin.cpp
    │   ├── safeCommon.h
    │   ├── sampleConfig.h
    │   ├── sampleDevice.h
    │   ├── sampleEngines.cpp
    │   ├── sampleEngines.h
    │   ├── sampleInference.cpp
    │   ├── sampleInference.h
    │   ├── sampleOptions.cpp
    │   ├── sampleOptions.h
    │   ├── sampleReporting.cpp
    │   ├── sampleReporting.h
    │   ├── sampleUtils.h
    │   └── serialize.hpp
├── function
    ├── CMakeLists.txt
    ├── TrtInfer.cpp
    ├── function.cpp
    └── image.cpp
├── gcn.onnx
├── generate_onnx.py
├── include
    ├── Head.h
    ├── TrtInfer.h
    ├── function.h
    └── image.hpp
├── infer_test.cpp
├── mnist.cpp
├── mytest.cpp
├── onnx2trt.cpp
├── onnx2trt_gcn.cpp
├── resnet50.cpp
├── sample.cpp
├── temp.cpp
└── testNet.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # This file is used to ignore files which are generated
 2 | # ----------------------------------------------------------------------------
 3 | *bin/*
 4 | *build/*
 5 | /data/
 6 | !/data/*.jpg
 7 | *~
 8 | *.autosave
 9 | *.a
10 | *.core
11 | *.moc
12 | *.o
13 | *.obj
14 | *.orig
15 | *.rej
16 | *.so
17 | *.so.*
18 | *_pch.h.cpp
19 | *_resource.rc
20 | *.qm
21 | .#*
22 | *.*#
23 | core
24 | !core/
25 | tags
26 | .DS_Store
27 | .directory
28 | *.debug
29 | Makefile*
30 | *.prl
31 | *.app
32 | moc_*.cpp
33 | ui_*.h
34 | qrc_*.cpp
35 | Thumbs.db
36 | *.res
37 | *.rc
38 | /.qmake.cache
39 | /.qmake.stash
40 | 
41 | # qtcreator generated files
42 | *.pro.user*
43 | 
44 | # xemacs temporary files
45 | *.flc
46 | 
47 | # Vim temporary files
48 | .*.swp
49 | 
50 | # Visual Studio generated files
51 | *.ib_pdb_index
52 | *.idb
53 | *.ilk
54 | *.pdb
55 | *.sln
56 | *.suo
57 | *.vcproj
58 | *vcproj.*.*.user
59 | *.ncb
60 | *.sdf
61 | *.opensdf
62 | *.vcxproj
63 | *vcxproj.*
64 | 
65 | # MinGW generated files
66 | *.Debug
67 | *.Release
68 | 
69 | # Python byte code
70 | *.pyc
71 | 
72 | # Binaries
73 | # --------
74 | *.dll
75 | *.exe
76 | 
77 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // 使用 IntelliSense 了解相关属性。 
 3 |     // 悬停以查看现有属性的描述。
 4 |     // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "(gdb) 启动",
 9 |             "type": "cppdbg",
10 |             "request": "launch",
11 |             "program": "${workspaceFolder}/bin/onnx2trt_gcn",
12 |             "args": [],
13 |             "stopAtEntry": false,
14 |             "cwd": "${fileDirname}",
15 |             "environment": [],
16 |             "externalConsole": false,
17 |             "MIMode": "gdb",
18 |             "setupCommands": [
19 |                 {
20 |                     "description": "为 gdb 启用整齐打印",
21 |                     "text": "-enable-pretty-printing",
22 |                     "ignoreFailures": true
23 |                 }
24 |             ]
25 |         }
26 |     ]
27 | }


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.16)
 2 | 
 3 | project(TensorRT)
 4 | 
 5 | set(CMAKE_BUILD_TYPE Debug)
 6 | #set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -o0 -Wall -g -ggdb")
 7 | set(CMAKE_CXX_STANDARD 11)
 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 9 | 
10 | set(OpenCV_ROOT "/usr/local/opencv4.4.0/lib/cmake/opencv4")
11 | set(OpenCV_DIR "/usr/local/opencv4.4.0/lib/cmake/opencv4")
12 | find_package(OpenCV  REQUIRED)
13 | INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
14 | LINK_LIBRARIES(${OpenCV_LIBS})
15 | 
16 | add_subdirectory(einsum)
17 | LINK_LIBRARIES(einsumlib)
18 | 
19 | INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR})  # 设定h文件路径
20 | add_subdirectory(function function)
21 | LINK_LIBRARIES(functionlib)
22 | 
23 | SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
24 | #add_executable(infer_test "infer_test.cpp")
25 | #add_executable(testNet "testNet.cpp")
26 | #add_executable(mnist "mnist.cpp")
27 | #add_executable(resnet50 "resnet50.cpp")
28 | #add_executable(SourceMnist "SourceMnist.cpp")
29 | #add_executable(onnx2trt "onnx2trt.cpp")
30 | add_executable(onnx2trt_gcn "onnx2trt_gcn.cpp")
31 | #add_executable(sample "sample.cpp")
32 | #add_executable(temp "sample.cpp")
33 | 
34 | #########################################
35 | # test
36 | #add_executable(cublasTest "cublasTest.cpp")
37 | #target_link_libraries(cublasTest cublas)
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## 前言
 2 | 
 3 | 由于TensorRT并未实现Einsum插件，而在转换GCN的过程中，网络频繁使用该op，因此，不得已手写plugin，顺便也学习了一下plugin的编写及注册方法。本仓库也旨在演示如何自定义编写的plugin。
 4 | 
 5 | 在plugin文件内，对每个成员函数的功能都进行了简单的注释（可能会有很多不清楚的地方，建议自行百度详细了解一下）
 6 | 
 7 | 强烈推荐这个教程[实现TensorRT自定义插件(plugin)自由](https://zhuanlan.zhihu.com/p/297002406)，按照这个教程肯定可以生成可以用的Plugin，目前网上搜到的都是直接重新编译生成新的`libnvinfer_plugin.so`替换官方原有的该库，然而这种方法在TensorRT-OSS版本不匹配TensorRT时就不能使用了，因此这里采用另一种更加灵活的方法：**直接将EinsumPlugin编译到自己的项目工程里即可**。
 8 | 
 9 | 本仓库**只实现了`nctkv,kvw->nctw`操作**，其他结构也都类似，自行修改插件，制作适合自己版本的即可。
10 | 
11 | ## 环境
12 | 
13 | > TensorRT8.0
14 | 
15 | TensorRT8.0相比以前版本的最大区别就是该版本的plugin必须在每个成员函数之后增加一个`throw()`，看`Einsum.cpp`程序就懂了。TensorRT7.0也可以使用，不需要修改任何代码。
16 | 
17 | 如果想要使用TensorRT7，直接在`einsum/CMakeLists.txt`内修改TensorRT路径，并切换到`einsum_common7`即可
18 | 
19 | ## 使用流程
20 | 
21 | 参考根目录下的`CMakeLists.txt`将einsum添加到自己的项目之中即可。
22 | 
23 | ==记得一定要在模型解析的源文件内(如这里的onnx2trt_gcn.cpp)，使用`REGISTER_TENSORRT_PLUGIN(EinsumCreator)`来注册Einsum插件，这样解析时才可以找到==
24 | 
25 | **测试用例使用方法**
26 | 
27 | 1. 运行`generate_onnx.py`生成测试的onnx文件
28 | 2. 编译该仓库，然后运行`onnnx2trt_gcn`，如果没报错就说明该einsum插件没有问题
29 | 
30 | ## 编写流程
31 | 
32 | 1. 从TensorRT-OSS的plugin文件内，拷贝一个官方的例程，然后直接在里面的编写就可以了，替换掉对应函数的内容即可
33 | 2. 关于自定义Plugin的依赖库问题，他主要依赖`TensorRT-OSS/plugin/common`和`TensorRT-(版本号)/samples/common`下的库，因此要将对应版本的`TensorRT-OSS和TensorRT`该路径下的文件全部复制到一个文件夹下(如本repo中的`einsum/einsum_common*`文件夹内)，然后使用cmake编译生成对应的库，并将该库链接到`einsum.cpp`即可
34 | 
35 | ## 细节
36 | 
37 | 由于本仓库的示例中与EinsumPlugin的插件依赖库相同，因此采用`target_link_libraries(PUBLIC)`将库向上连接到示例程序`onnx2trt_gcn`
38 | 
39 | 根目录下只会使用`onnx2trt_gcn.cpp`，其他的cpp文件不需要，可以删除。


--------------------------------------------------------------------------------
/SourceMnist.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include "NvInferRuntime.h"
  3 | #include "NvInferRuntimeCommon.h"
  4 | #include "NvOnnxConfig.h"
  5 | #include "NvOnnxParser.h"
  6 | #include "NvUtils.h"
  7 | #include <cuda_runtime_api.h>
  8 | #include <fstream>
  9 | #include <cmath>
 10 | #include <memory>
 11 | 
 12 | using namespace std;
 13 | using namespace nvinfer1;
 14 | 
 15 | class Logger : public ILogger
 16 | {
 17 | void log(Severity severity, const char* msg) override
 18 | {
 19 | // suppress info-level messages
 20 | if (severity != Severity::kINFO)
 21 | std::cout << msg << std::endl;
 22 | }
 23 | } gLogger;
 24 | 
 25 | void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, int inW)
 26 | {
 27 |     std::ifstream infile(fileName, std::ifstream::binary);
 28 |     std::string magic, h, w, max;
 29 |     infile >> magic >> h >> w >> max;
 30 |     infile.seekg(1, infile.cur);
 31 |     infile.read(reinterpret_cast<char*>(buffer), inH * inW);
 32 | }
 33 | 
 34 | struct InferDeleter
 35 | {
 36 |   template <typename T>
 37 |   void operator()(T* obj) const{
 38 |     if (obj)
 39 |     {
 40 |         obj->destroy();
 41 |     }
 42 |   }
 43 | };
 44 | 
 45 | struct CudaDeleter
 46 | {
 47 |   void operator()(void* obj){
 48 |     if (obj)
 49 |     {
 50 |         cudaFree(obj);
 51 |     }
 52 |   }
 53 | };
 54 | 
 55 | const char* onnxModelFile = "../data/mnist.onnx";
 56 | const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
 57 | 
 58 | int main()
 59 | {
 60 |   std::unique_ptr<IBuilder, InferDeleter> builder(createInferBuilder(gLogger));
 61 |   builder->setMaxBatchSize(1);
 62 | 
 63 |   std::unique_ptr<INetworkDefinition, InferDeleter> network(builder->createNetworkV2(explicitBatch));
 64 |   std::unique_ptr<nvonnxparser::IParser, InferDeleter> parser(nvonnxparser::createParser(*network, gLogger));
 65 |   parser->parseFromFile(onnxModelFile, static_cast<int>(ILogger::Severity::kWARNING));
 66 | 
 67 |   std::unique_ptr<IBuilderConfig, InferDeleter> config(builder->createBuilderConfig());
 68 |   config->setMaxWorkspaceSize(1 << 20);
 69 |   config->setFlag(BuilderFlag::kGPU_FALLBACK);
 70 |   config->setFlag(BuilderFlag::kSTRICT_TYPES);
 71 | 
 72 |   std::unique_ptr<ICudaEngine, InferDeleter> engine(builder->buildEngineWithConfig(*network, *config));
 73 |   std::unique_ptr<IExecutionContext, InferDeleter> context(engine->createExecutionContext());
 74 | 
 75 |   vector<float> input_cpu_data, output_cpu_data;
 76 |   input_cpu_data.resize(28*28);
 77 |   output_cpu_data.resize(10);
 78 | 
 79 |   // Load pgm image
 80 |   std::vector<uint8_t> fileData(28 * 28);
 81 |   readPGMFile("../data/9.pgm", fileData.data(), 28, 28);
 82 |   for(int i = 0; i < 28*28; ++i){
 83 |     input_cpu_data[i] = 1.0 - (fileData[i]/255.0);
 84 |     std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % 28) ? "" : "\n");
 85 |   }
 86 | 
 87 | 
 88 |   cudaStream_t stream;
 89 |   cudaStreamCreate(&stream);
 90 | 
 91 |   int size_input = sizeof(float)*28*28;
 92 |   int size_output = sizeof(float)*10;
 93 | 
 94 |   void *input_gpu_data_ptr, *output_gpu_data_ptr;
 95 |   std::unique_ptr<void, CudaDeleter> input_gpu_data, output_gpu_data;
 96 |   cudaMalloc(&input_gpu_data_ptr, size_input);
 97 |   cudaMalloc(&output_gpu_data_ptr, size_output);
 98 |   input_gpu_data.reset(input_gpu_data_ptr);
 99 |   output_gpu_data.reset(output_gpu_data_ptr);
100 | 
101 |   void* buffers[2];
102 |   buffers[0] = input_gpu_data.get();
103 |   buffers[1] = output_gpu_data.get();
104 | 
105 |   cudaMemcpyAsync(buffers[0], input_cpu_data.data(), size_input, cudaMemcpyHostToDevice, stream);
106 | 
107 |   bool is_success = context->executeV2(buffers);
108 |   if(is_success)
109 |     std::cout << "Forward success !" << std::endl;
110 |   else
111 |     std::cout << "Forward Error !" << std::endl;
112 | 
113 |   cudaMemcpyAsync(output_cpu_data.data(), buffers[1], size_output, cudaMemcpyDeviceToHost, stream);
114 |   cudaStreamSynchronize(stream);
115 |   cudaStreamDestroy(stream);
116 | 
117 |   // softmax
118 |   float sum{0.0f};
119 |   for (int i = 0; i < 10; i++){
120 |       output_cpu_data[i] = exp(output_cpu_data[i]);
121 |       sum += output_cpu_data[i];
122 |   }
123 | 
124 |   // output
125 |   for(int i = 0; i < 10; ++i){
126 |     output_cpu_data[i] /= sum;
127 |     std::cout <<  i << ": " << std::string(floor(output_cpu_data[i] * 10 + 0.5f), '*') << "\n";
128 |   }
129 | 
130 |   return 1;
131 | }
132 | 
133 | 


--------------------------------------------------------------------------------
/cublasTest.cpp:
--------------------------------------------------------------------------------
 1 | // CUDA runtime 库 + CUBLAS 库
 2 | #include "cuda_runtime.h"
 3 | #include "cublas_v2.h"
 4 | #include <iostream>
 5 | #include <stdlib.h>
 6 | 
 7 | using namespace std;
 8 | 
 9 | // 定义测试矩阵的维度
10 | int const A_ROW = 1;
11 | int const A_COL = 45;
12 | int const B_ROW = 45;
13 | int const B_COL = 15;
14 | 
15 | int main()
16 | {
17 |   // 定义状态变量
18 |   cublasStatus_t status;
19 |   float *h_A,*h_B,*h_C;   //存储于内存中的矩阵
20 |   h_A = (float*)malloc(sizeof(float)*A_ROW*A_COL);  //在内存中开辟空间
21 |   h_B = (float*)malloc(sizeof(float)*B_ROW*B_COL);
22 |   h_C = (float*)malloc(sizeof(float)*A_ROW*B_COL);
23 | 
24 |   // 为待运算矩阵的元素赋予 0-10 范围内的随机数
25 |   for (int i=0; i<A_ROW*A_COL; i++) {
26 |     h_A[i] = (float)(0%10+1);
27 |   }
28 |   for(int i=0;i<B_ROW*B_COL; i++) {
29 |     h_B[i] = (float)(0%10+1);
30 |   }
31 |   // 打印待测试的矩阵
32 |   cout << "矩阵 A :" << endl;
33 |   for (int i=0; i<A_ROW*A_COL; i++){
34 |     cout << h_A[i] << " ";
35 |     if ((i+1)%A_COL == 0) cout << endl;
36 |   }
37 |   cout << endl;
38 |   cout << "矩阵 B :" << endl;
39 |   for (int i=0; i<B_ROW*B_COL; i++){
40 |     cout << h_B[i] << " ";
41 |     if ((i+1)%B_COL == 0) cout << endl;
42 |   }
43 |   cout << endl;
44 | 
45 |   float *d_A,*d_B,*d_C;    //存储于显存中的矩阵
46 |   cudaMalloc((void**)&d_A,sizeof(float)*A_ROW*A_COL); //在显存中开辟空间
47 |   cudaMalloc((void**)&d_B,sizeof(float)*B_ROW*B_COL);
48 |   cudaMalloc((void**)&d_C,sizeof(float)*A_ROW*B_COL);
49 | 
50 |   cublasHandle_t handle;
51 |   cublasCreate(&handle);
52 |   cudaMemcpy(d_A,h_A,sizeof(float)*A_ROW*A_COL,cudaMemcpyHostToDevice); //数据从内存拷贝到显存
53 |   cudaMemcpy(d_B,h_B,sizeof(float)*B_ROW*B_COL,cudaMemcpyHostToDevice);
54 | 
55 |   float a = 1, b = 0;
56 |   cublasSgemm(
57 |           handle,
58 |           CUBLAS_OP_N,   //矩阵A的属性参数，不转置，按列优先
59 |           CUBLAS_OP_N,   //矩阵B的属性参数，不转置，按列优先
60 |           B_COL,          //矩阵B^T、C^T的行数
61 |           A_ROW,          //矩阵A^T、C^T的列数
62 |           B_ROW,          //B^T的列数，A^T的行数，此处也可为A_COL,一样的
63 |           &a,             //alpha的值
64 |           d_B,            //左矩阵，为B^T
65 |           B_COL,          //B^T的leading dimension，按列优先，则leading dimension为B^T的行数(B的列数)
66 |           d_A,            //右矩阵，为A^T
67 |           A_COL,          //A^T的leading dimension，按列优先，则leading dimension为A^T的行数(A的列数)
68 |           &b,             //beta的值
69 |           d_C,            //结果矩阵C
70 |           B_COL           //C^T的leading dimension，C^T矩阵一定按列优先，则leading dimension为C^T的行数(C的列数)
71 |   );
72 |   //此时得到的结果便是C=AB,但由于C是按列优先，故此时得到的C应该是正确结果的转置
73 |   std::cout << "计算结果的转置 ( (A*B)的转置 )：" << std::endl;
74 | 
75 | 
76 |   cudaMemcpy(h_C,d_C,sizeof(float)*A_ROW*B_COL,cudaMemcpyDeviceToHost);
77 |   int sum=0;
78 |   for(int i=0;i<A_ROW*B_COL;++i) {
79 |     std::cout<<h_C[i]<<" ";
80 |     if((i+1)%B_COL==0) std::cout<<std::endl;
81 |     sum+=h_C[i];
82 |   }
83 |     std::cout << sum << std::endl;
84 | 
85 |   cudaFree(d_A);
86 |   cudaFree(d_B);
87 |   cudaFree(d_C);
88 |   free(h_A);
89 |   free(h_B);
90 |   free(h_C);
91 |   return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/data/tabby_tiger_cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/data/tabby_tiger_cat.jpg


--------------------------------------------------------------------------------
/einsum/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.16)
 2 | set(CMAKE_CXX_STANDARD 11)
 3 | 
 4 | include_directories(/home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-7.2.1.6/include/)
 5 | file(GLOB_RECURSE trtlib /home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-7.2.1.6/lib/*.so)
 6 | # set(trthead /home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-8.0.0.3/include/)
 7 | # file(GLOB_RECURSE trtlib /home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-8.0.0.3/lib/*.so)
 8 | message("tensort lib: in " ${trtlib})
 9 | 
10 | find_package(CUDA REQUIRED)
11 | file(GLOB_RECURSE CUDA_LIBRARIES /usr/local/cuda/lib64/*.so) # 我的电脑不知道为什么so文件导入不全，所以这里自行导入所有so，如果你的环境没问题可以不用加这一句
12 | message("cuda include : " ${CUDA_INCLUDE_DIRS})
13 | message("cuda lib : " ${CUDA_LIBRARIES})
14 | 
15 | include_directories(${trthead} ${CUDA_INCLUDE_DIRS}) # 单独链接cuda，为了einsum_common可以调用cuda库
16 | link_libraries(${trtlib} ${CUDA_LIBRARIES})
17 | 
18 | set(einsum_common_dir einsum_common7) # 设置einsum_common的版本，即tensorrt的版本，目前只支持tensorrt的8和7版本
19 | add_subdirectory(${einsum_common_dir} einsum_common)
20 | 
21 | add_library(einsumlib Einsum.cpp)
22 | set (Einsum_Heads
23 |     ${trthead}
24 |     ${CUDA_INCLUDE_DIRS}
25 |     ${einsum_common_dir}
26 |     ${CMAKE_CURRENT_SOURCE_DIR}
27 | )
28 | set (Einsum_Libs
29 |     ${trtlib}
30 |     ${CUDA_LIBRARIES}
31 |     einsum_common_lib
32 | )
33 | target_include_directories(einsumlib PUBLIC ${Einsum_Heads}) # 使用PUBLIC，使其可以往上传递，其链接的库都可以被使用einsumlib的库使用
34 | target_link_libraries(einsumlib PUBLIC ${Einsum_Libs})
35 | 


--------------------------------------------------------------------------------
/einsum/build/.cmake/api/v1/query/client-vscode/query.json:
--------------------------------------------------------------------------------
1 | {"requests":[{"kind":"cache","version":2},{"kind":"codemodel","version":2},{"kind":"toolchains","version":1}]}


--------------------------------------------------------------------------------
/einsum/build/.cmake/api/v1/reply/codemodel-v2-91dcb8fb09f012c6e06f.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"configurations" : 
  3 | 	[
  4 | 		{
  5 | 			"directories" : 
  6 | 			[
  7 | 				{
  8 | 					"build" : ".",
  9 | 					"childIndexes" : 
 10 | 					[
 11 | 						1
 12 | 					],
 13 | 					"minimumCMakeVersion" : 
 14 | 					{
 15 | 						"string" : "3.16"
 16 | 					},
 17 | 					"projectIndex" : 0,
 18 | 					"source" : ".",
 19 | 					"targetIndexes" : 
 20 | 					[
 21 | 						1
 22 | 					]
 23 | 				},
 24 | 				{
 25 | 					"build" : "einsum_common8",
 26 | 					"childIndexes" : 
 27 | 					[
 28 | 						2
 29 | 					],
 30 | 					"minimumCMakeVersion" : 
 31 | 					{
 32 | 						"string" : "3.16"
 33 | 					},
 34 | 					"parentIndex" : 0,
 35 | 					"projectIndex" : 0,
 36 | 					"source" : "einsum_common8",
 37 | 					"targetIndexes" : 
 38 | 					[
 39 | 						0
 40 | 					]
 41 | 				},
 42 | 				{
 43 | 					"build" : "einsum_common8/kernels",
 44 | 					"minimumCMakeVersion" : 
 45 | 					{
 46 | 						"string" : "3.16"
 47 | 					},
 48 | 					"parentIndex" : 1,
 49 | 					"projectIndex" : 0,
 50 | 					"source" : "einsum_common8/kernels"
 51 | 				}
 52 | 			],
 53 | 			"name" : "Debug",
 54 | 			"projects" : 
 55 | 			[
 56 | 				{
 57 | 					"directoryIndexes" : 
 58 | 					[
 59 | 						0,
 60 | 						1,
 61 | 						2
 62 | 					],
 63 | 					"name" : "Project",
 64 | 					"targetIndexes" : 
 65 | 					[
 66 | 						0,
 67 | 						1
 68 | 					]
 69 | 				}
 70 | 			],
 71 | 			"targets" : 
 72 | 			[
 73 | 				{
 74 | 					"directoryIndex" : 1,
 75 | 					"id" : "einsum_common8_lib::@d74fa15e658314f54f96",
 76 | 					"jsonFile" : "target-einsum_common8_lib-Debug-3718d237bae652e49c1b.json",
 77 | 					"name" : "einsum_common8_lib",
 78 | 					"projectIndex" : 0
 79 | 				},
 80 | 				{
 81 | 					"directoryIndex" : 0,
 82 | 					"id" : "einsumlib::@6890427a1f51a3e7e1df",
 83 | 					"jsonFile" : "target-einsumlib-Debug-0f07c3f3305b026cdd92.json",
 84 | 					"name" : "einsumlib",
 85 | 					"projectIndex" : 0
 86 | 				}
 87 | 			]
 88 | 		}
 89 | 	],
 90 | 	"kind" : "codemodel",
 91 | 	"paths" : 
 92 | 	{
 93 | 		"build" : "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build",
 94 | 		"source" : "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum"
 95 | 	},
 96 | 	"version" : 
 97 | 	{
 98 | 		"major" : 2,
 99 | 		"minor" : 2
100 | 	}
101 | }
102 | 


--------------------------------------------------------------------------------
/einsum/build/.cmake/api/v1/reply/index-2021-06-27T04-48-55-0999.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"cmake" : 
  3 | 	{
  4 | 		"generator" : 
  5 | 		{
  6 | 			"multiConfig" : false,
  7 | 			"name" : "Unix Makefiles"
  8 | 		},
  9 | 		"paths" : 
 10 | 		{
 11 | 			"cmake" : "/snap/cmake/888/bin/cmake",
 12 | 			"cpack" : "/snap/cmake/888/bin/cpack",
 13 | 			"ctest" : "/snap/cmake/888/bin/ctest",
 14 | 			"root" : "/snap/cmake/888/share/cmake-3.20"
 15 | 		},
 16 | 		"version" : 
 17 | 		{
 18 | 			"isDirty" : false,
 19 | 			"major" : 3,
 20 | 			"minor" : 20,
 21 | 			"patch" : 5,
 22 | 			"string" : "3.20.5",
 23 | 			"suffix" : ""
 24 | 		}
 25 | 	},
 26 | 	"objects" : 
 27 | 	[
 28 | 		{
 29 | 			"jsonFile" : "codemodel-v2-91dcb8fb09f012c6e06f.json",
 30 | 			"kind" : "codemodel",
 31 | 			"version" : 
 32 | 			{
 33 | 				"major" : 2,
 34 | 				"minor" : 2
 35 | 			}
 36 | 		},
 37 | 		{
 38 | 			"jsonFile" : "cache-v2-8b57e125de1d3e63be5c.json",
 39 | 			"kind" : "cache",
 40 | 			"version" : 
 41 | 			{
 42 | 				"major" : 2,
 43 | 				"minor" : 0
 44 | 			}
 45 | 		},
 46 | 		{
 47 | 			"jsonFile" : "toolchains-v1-7e03fe365e7edf64e679.json",
 48 | 			"kind" : "toolchains",
 49 | 			"version" : 
 50 | 			{
 51 | 				"major" : 1,
 52 | 				"minor" : 0
 53 | 			}
 54 | 		}
 55 | 	],
 56 | 	"reply" : 
 57 | 	{
 58 | 		"client-vscode" : 
 59 | 		{
 60 | 			"query.json" : 
 61 | 			{
 62 | 				"requests" : 
 63 | 				[
 64 | 					{
 65 | 						"kind" : "cache",
 66 | 						"version" : 2
 67 | 					},
 68 | 					{
 69 | 						"kind" : "codemodel",
 70 | 						"version" : 2
 71 | 					},
 72 | 					{
 73 | 						"kind" : "toolchains",
 74 | 						"version" : 1
 75 | 					}
 76 | 				],
 77 | 				"responses" : 
 78 | 				[
 79 | 					{
 80 | 						"jsonFile" : "cache-v2-8b57e125de1d3e63be5c.json",
 81 | 						"kind" : "cache",
 82 | 						"version" : 
 83 | 						{
 84 | 							"major" : 2,
 85 | 							"minor" : 0
 86 | 						}
 87 | 					},
 88 | 					{
 89 | 						"jsonFile" : "codemodel-v2-91dcb8fb09f012c6e06f.json",
 90 | 						"kind" : "codemodel",
 91 | 						"version" : 
 92 | 						{
 93 | 							"major" : 2,
 94 | 							"minor" : 2
 95 | 						}
 96 | 					},
 97 | 					{
 98 | 						"jsonFile" : "toolchains-v1-7e03fe365e7edf64e679.json",
 99 | 						"kind" : "toolchains",
100 | 						"version" : 
101 | 						{
102 | 							"major" : 1,
103 | 							"minor" : 0
104 | 						}
105 | 					}
106 | 				]
107 | 			}
108 | 		}
109 | 	}
110 | }
111 | 


--------------------------------------------------------------------------------
/einsum/build/.cmake/api/v1/reply/target-einsum_common8_lib-Debug-3718d237bae652e49c1b.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"archive" : {},
  3 | 	"artifacts" : 
  4 | 	[
  5 | 		{
  6 | 			"path" : "einsum_common8/libeinsum_common8_lib.a"
  7 | 		}
  8 | 	],
  9 | 	"backtrace" : 1,
 10 | 	"backtraceGraph" : 
 11 | 	{
 12 | 		"commands" : 
 13 | 		[
 14 | 			"add_library",
 15 | 			"include_directories"
 16 | 		],
 17 | 		"files" : 
 18 | 		[
 19 | 			"einsum_common8/CMakeLists.txt",
 20 | 			"CMakeLists.txt"
 21 | 		],
 22 | 		"nodes" : 
 23 | 		[
 24 | 			{
 25 | 				"file" : 0
 26 | 			},
 27 | 			{
 28 | 				"command" : 0,
 29 | 				"file" : 0,
 30 | 				"line" : 4,
 31 | 				"parent" : 0
 32 | 			},
 33 | 			{
 34 | 				"file" : 1
 35 | 			},
 36 | 			{
 37 | 				"command" : 1,
 38 | 				"file" : 1,
 39 | 				"line" : 29,
 40 | 				"parent" : 2
 41 | 			}
 42 | 		]
 43 | 	},
 44 | 	"compileGroups" : 
 45 | 	[
 46 | 		{
 47 | 			"compileCommandFragments" : 
 48 | 			[
 49 | 				{
 50 | 					"fragment" : "-g"
 51 | 				},
 52 | 				{
 53 | 					"fragment" : "-std=gnu++11"
 54 | 				}
 55 | 			],
 56 | 			"includes" : 
 57 | 			[
 58 | 				{
 59 | 					"backtrace" : 3,
 60 | 					"path" : "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8"
 61 | 				}
 62 | 			],
 63 | 			"language" : "CXX",
 64 | 			"languageStandard" : 
 65 | 			{
 66 | 				"backtraces" : 
 67 | 				[
 68 | 					1
 69 | 				],
 70 | 				"standard" : "11"
 71 | 			},
 72 | 			"sourceIndexes" : 
 73 | 			[
 74 | 				0,
 75 | 				1,
 76 | 				2,
 77 | 				3,
 78 | 				4,
 79 | 				5,
 80 | 				6,
 81 | 				7,
 82 | 				8,
 83 | 				9
 84 | 			]
 85 | 		}
 86 | 	],
 87 | 	"id" : "einsum_common8_lib::@d74fa15e658314f54f96",
 88 | 	"name" : "einsum_common8_lib",
 89 | 	"nameOnDisk" : "libeinsum_common8_lib.a",
 90 | 	"paths" : 
 91 | 	{
 92 | 		"build" : "einsum_common8",
 93 | 		"source" : "einsum_common8"
 94 | 	},
 95 | 	"sourceGroups" : 
 96 | 	[
 97 | 		{
 98 | 			"name" : "Source Files",
 99 | 			"sourceIndexes" : 
100 | 			[
101 | 				0,
102 | 				1,
103 | 				2,
104 | 				3,
105 | 				4,
106 | 				5,
107 | 				6,
108 | 				7,
109 | 				8,
110 | 				9
111 | 			]
112 | 		}
113 | 	],
114 | 	"sources" : 
115 | 	[
116 | 		{
117 | 			"backtrace" : 1,
118 | 			"compileGroupIndex" : 0,
119 | 			"path" : "einsum_common8/checkMacrosPlugin.cpp",
120 | 			"sourceGroupIndex" : 0
121 | 		},
122 | 		{
123 | 			"backtrace" : 1,
124 | 			"compileGroupIndex" : 0,
125 | 			"path" : "einsum_common8/cudaDriverWrapper.cpp",
126 | 			"sourceGroupIndex" : 0
127 | 		},
128 | 		{
129 | 			"backtrace" : 1,
130 | 			"compileGroupIndex" : 0,
131 | 			"path" : "einsum_common8/getOptions.cpp",
132 | 			"sourceGroupIndex" : 0
133 | 		},
134 | 		{
135 | 			"backtrace" : 1,
136 | 			"compileGroupIndex" : 0,
137 | 			"path" : "einsum_common8/logger.cpp",
138 | 			"sourceGroupIndex" : 0
139 | 		},
140 | 		{
141 | 			"backtrace" : 1,
142 | 			"compileGroupIndex" : 0,
143 | 			"path" : "einsum_common8/nmsHelper.cpp",
144 | 			"sourceGroupIndex" : 0
145 | 		},
146 | 		{
147 | 			"backtrace" : 1,
148 | 			"compileGroupIndex" : 0,
149 | 			"path" : "einsum_common8/reducedMathPlugin.cpp",
150 | 			"sourceGroupIndex" : 0
151 | 		},
152 | 		{
153 | 			"backtrace" : 1,
154 | 			"compileGroupIndex" : 0,
155 | 			"path" : "einsum_common8/sampleEngines.cpp",
156 | 			"sourceGroupIndex" : 0
157 | 		},
158 | 		{
159 | 			"backtrace" : 1,
160 | 			"compileGroupIndex" : 0,
161 | 			"path" : "einsum_common8/sampleInference.cpp",
162 | 			"sourceGroupIndex" : 0
163 | 		},
164 | 		{
165 | 			"backtrace" : 1,
166 | 			"compileGroupIndex" : 0,
167 | 			"path" : "einsum_common8/sampleOptions.cpp",
168 | 			"sourceGroupIndex" : 0
169 | 		},
170 | 		{
171 | 			"backtrace" : 1,
172 | 			"compileGroupIndex" : 0,
173 | 			"path" : "einsum_common8/sampleReporting.cpp",
174 | 			"sourceGroupIndex" : 0
175 | 		}
176 | 	],
177 | 	"type" : "STATIC_LIBRARY"
178 | }
179 | 


--------------------------------------------------------------------------------
/einsum/build/.cmake/api/v1/reply/target-einsumlib-Debug-0f07c3f3305b026cdd92.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"archive" : {},
  3 | 	"artifacts" : 
  4 | 	[
  5 | 		{
  6 | 			"path" : "libeinsumlib.a"
  7 | 		}
  8 | 	],
  9 | 	"backtrace" : 1,
 10 | 	"backtraceGraph" : 
 11 | 	{
 12 | 		"commands" : 
 13 | 		[
 14 | 			"add_library",
 15 | 			"include_directories"
 16 | 		],
 17 | 		"files" : 
 18 | 		[
 19 | 			"CMakeLists.txt"
 20 | 		],
 21 | 		"nodes" : 
 22 | 		[
 23 | 			{
 24 | 				"file" : 0
 25 | 			},
 26 | 			{
 27 | 				"command" : 0,
 28 | 				"file" : 0,
 29 | 				"line" : 33,
 30 | 				"parent" : 0
 31 | 			},
 32 | 			{
 33 | 				"command" : 1,
 34 | 				"file" : 0,
 35 | 				"line" : 29,
 36 | 				"parent" : 0
 37 | 			}
 38 | 		]
 39 | 	},
 40 | 	"compileGroups" : 
 41 | 	[
 42 | 		{
 43 | 			"compileCommandFragments" : 
 44 | 			[
 45 | 				{
 46 | 					"fragment" : "-g"
 47 | 				},
 48 | 				{
 49 | 					"fragment" : "-std=gnu++11"
 50 | 				}
 51 | 			],
 52 | 			"includes" : 
 53 | 			[
 54 | 				{
 55 | 					"backtrace" : 2,
 56 | 					"path" : "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8"
 57 | 				}
 58 | 			],
 59 | 			"language" : "CXX",
 60 | 			"languageStandard" : 
 61 | 			{
 62 | 				"backtraces" : 
 63 | 				[
 64 | 					1
 65 | 				],
 66 | 				"standard" : "11"
 67 | 			},
 68 | 			"sourceIndexes" : 
 69 | 			[
 70 | 				0
 71 | 			]
 72 | 		}
 73 | 	],
 74 | 	"dependencies" : 
 75 | 	[
 76 | 		{
 77 | 			"backtrace" : 1,
 78 | 			"id" : "einsum_common8_lib::@d74fa15e658314f54f96"
 79 | 		}
 80 | 	],
 81 | 	"id" : "einsumlib::@6890427a1f51a3e7e1df",
 82 | 	"name" : "einsumlib",
 83 | 	"nameOnDisk" : "libeinsumlib.a",
 84 | 	"paths" : 
 85 | 	{
 86 | 		"build" : ".",
 87 | 		"source" : "."
 88 | 	},
 89 | 	"sourceGroups" : 
 90 | 	[
 91 | 		{
 92 | 			"name" : "Source Files",
 93 | 			"sourceIndexes" : 
 94 | 			[
 95 | 				0
 96 | 			]
 97 | 		}
 98 | 	],
 99 | 	"sources" : 
100 | 	[
101 | 		{
102 | 			"backtrace" : 1,
103 | 			"compileGroupIndex" : 0,
104 | 			"path" : "Einsum.cpp",
105 | 			"sourceGroupIndex" : 0
106 | 		}
107 | 	],
108 | 	"type" : "STATIC_LIBRARY"
109 | }
110 | 


--------------------------------------------------------------------------------
/einsum/build/.cmake/api/v1/reply/toolchains-v1-7e03fe365e7edf64e679.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"kind" : "toolchains",
  3 | 	"toolchains" : 
  4 | 	[
  5 | 		{
  6 | 			"compiler" : 
  7 | 			{
  8 | 				"id" : "GNU",
  9 | 				"implicit" : 
 10 | 				{
 11 | 					"includeDirectories" : 
 12 | 					[
 13 | 						"/usr/lib/gcc/x86_64-linux-gnu/7/include",
 14 | 						"/usr/local/include",
 15 | 						"/usr/include/x86_64-linux-gnu",
 16 | 						"/usr/include"
 17 | 					],
 18 | 					"linkDirectories" : 
 19 | 					[
 20 | 						"/usr/lib/gcc/x86_64-linux-gnu/7",
 21 | 						"/usr/lib/x86_64-linux-gnu",
 22 | 						"/usr/lib",
 23 | 						"/lib/x86_64-linux-gnu",
 24 | 						"/lib"
 25 | 					],
 26 | 					"linkFrameworkDirectories" : [],
 27 | 					"linkLibraries" : 
 28 | 					[
 29 | 						"gcc",
 30 | 						"gcc_s",
 31 | 						"c",
 32 | 						"gcc",
 33 | 						"gcc_s"
 34 | 					]
 35 | 				},
 36 | 				"path" : "/bin/x86_64-linux-gnu-gcc-7",
 37 | 				"version" : "7.5.0"
 38 | 			},
 39 | 			"language" : "C",
 40 | 			"sourceFileExtensions" : 
 41 | 			[
 42 | 				"c",
 43 | 				"m"
 44 | 			]
 45 | 		},
 46 | 		{
 47 | 			"compiler" : 
 48 | 			{
 49 | 				"id" : "GNU",
 50 | 				"implicit" : 
 51 | 				{
 52 | 					"includeDirectories" : 
 53 | 					[
 54 | 						"/usr/include/c++/7",
 55 | 						"/usr/include/x86_64-linux-gnu/c++/7",
 56 | 						"/usr/include/c++/7/backward",
 57 | 						"/usr/lib/gcc/x86_64-linux-gnu/7/include",
 58 | 						"/usr/local/include",
 59 | 						"/usr/include/x86_64-linux-gnu",
 60 | 						"/usr/include"
 61 | 					],
 62 | 					"linkDirectories" : 
 63 | 					[
 64 | 						"/usr/lib/gcc/x86_64-linux-gnu/7",
 65 | 						"/usr/lib/x86_64-linux-gnu",
 66 | 						"/usr/lib",
 67 | 						"/lib/x86_64-linux-gnu",
 68 | 						"/lib"
 69 | 					],
 70 | 					"linkFrameworkDirectories" : [],
 71 | 					"linkLibraries" : 
 72 | 					[
 73 | 						"stdc++",
 74 | 						"m",
 75 | 						"gcc_s",
 76 | 						"gcc",
 77 | 						"c",
 78 | 						"gcc_s",
 79 | 						"gcc"
 80 | 					]
 81 | 				},
 82 | 				"path" : "/bin/x86_64-linux-gnu-g++-7",
 83 | 				"version" : "7.5.0"
 84 | 			},
 85 | 			"language" : "CXX",
 86 | 			"sourceFileExtensions" : 
 87 | 			[
 88 | 				"C",
 89 | 				"M",
 90 | 				"c++",
 91 | 				"cc",
 92 | 				"cpp",
 93 | 				"cxx",
 94 | 				"mm",
 95 | 				"mpp",
 96 | 				"CPP"
 97 | 			]
 98 | 		}
 99 | 	],
100 | 	"version" : 
101 | 	{
102 | 		"major" : 1,
103 | 		"minor" : 0
104 | 	}
105 | }
106 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/3.20.5/CMakeCCompiler.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_C_COMPILER "/bin/x86_64-linux-gnu-gcc-7")
 2 | set(CMAKE_C_COMPILER_ARG1 "")
 3 | set(CMAKE_C_COMPILER_ID "GNU")
 4 | set(CMAKE_C_COMPILER_VERSION "7.5.0")
 5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
 6 | set(CMAKE_C_COMPILER_WRAPPER "")
 7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "11")
 8 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert")
 9 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
10 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
11 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
12 | 
13 | set(CMAKE_C_PLATFORM_ID "Linux")
14 | set(CMAKE_C_SIMULATE_ID "")
15 | set(CMAKE_C_COMPILER_FRONTEND_VARIANT "")
16 | set(CMAKE_C_SIMULATE_VERSION "")
17 | 
18 | 
19 | 
20 | 
21 | set(CMAKE_AR "/bin/x86_64-linux-gnu-ar")
22 | set(CMAKE_C_COMPILER_AR "/bin/x86_64-linux-gnu-gcc-ar-7")
23 | set(CMAKE_RANLIB "/bin/x86_64-linux-gnu-ranlib")
24 | set(CMAKE_C_COMPILER_RANLIB "/bin/x86_64-linux-gnu-gcc-ranlib-7")
25 | set(CMAKE_LINKER "/bin/x86_64-linux-gnu-ld")
26 | set(CMAKE_MT "")
27 | set(CMAKE_COMPILER_IS_GNUCC 1)
28 | set(CMAKE_C_COMPILER_LOADED 1)
29 | set(CMAKE_C_COMPILER_WORKS TRUE)
30 | set(CMAKE_C_ABI_COMPILED TRUE)
31 | set(CMAKE_COMPILER_IS_MINGW )
32 | set(CMAKE_COMPILER_IS_CYGWIN )
33 | if(CMAKE_COMPILER_IS_CYGWIN)
34 |   set(CYGWIN 1)
35 |   set(UNIX 1)
36 | endif()
37 | 
38 | set(CMAKE_C_COMPILER_ENV_VAR "CC")
39 | 
40 | if(CMAKE_COMPILER_IS_MINGW)
41 |   set(MINGW 1)
42 | endif()
43 | set(CMAKE_C_COMPILER_ID_RUN 1)
44 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
45 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
46 | set(CMAKE_C_LINKER_PREFERENCE 10)
47 | 
48 | # Save compiler ABI information.
49 | set(CMAKE_C_SIZEOF_DATA_PTR "8")
50 | set(CMAKE_C_COMPILER_ABI "ELF")
51 | set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
52 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
53 | 
54 | if(CMAKE_C_SIZEOF_DATA_PTR)
55 |   set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
56 | endif()
57 | 
58 | if(CMAKE_C_COMPILER_ABI)
59 |   set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
60 | endif()
61 | 
62 | if(CMAKE_C_LIBRARY_ARCHITECTURE)
63 |   set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
64 | endif()
65 | 
66 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
67 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
68 |   set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
69 | endif()
70 | 
71 | 
72 | 
73 | 
74 | 
75 | set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/7/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
76 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
77 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/7;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib")
78 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
79 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/3.20.5/CMakeDetermineCompilerABI_C.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/einsum/build/CMakeFiles/3.20.5/CMakeDetermineCompilerABI_C.bin


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/3.20.5/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/einsum/build/CMakeFiles/3.20.5/CMakeDetermineCompilerABI_CXX.bin


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/3.20.5/CMakeSystem.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_HOST_SYSTEM "Linux-5.4.0-26-generic")
 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
 3 | set(CMAKE_HOST_SYSTEM_VERSION "5.4.0-26-generic")
 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
 5 | 
 6 | 
 7 | 
 8 | set(CMAKE_SYSTEM "Linux-5.4.0-26-generic")
 9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "5.4.0-26-generic")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 | 
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 | 
15 | set(CMAKE_SYSTEM_LOADED 1)
16 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/3.20.5/CompilerIdC/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/einsum/build/CMakeFiles/3.20.5/CompilerIdC/a.out


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/3.20.5/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/einsum/build/CMakeFiles/3.20.5/CompilerIdCXX/a.out


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/CMakeError.log:
--------------------------------------------------------------------------------
 1 | Performing C SOURCE FILE Test CMAKE_HAVE_LIBC_PTHREAD failed with the following output:
 2 | Change Dir: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp
 3 | 
 4 | Run Build Command(s):/usr/bin/make -f Makefile cmTC_1aa75/fast && /usr/bin/make  -f CMakeFiles/cmTC_1aa75.dir/build.make CMakeFiles/cmTC_1aa75.dir/build
 5 | make[1]: 进入目录“/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp”
 6 | Building C object CMakeFiles/cmTC_1aa75.dir/src.c.o
 7 | /bin/x86_64-linux-gnu-gcc-7 -DCMAKE_HAVE_LIBC_PTHREAD  -fPIC  -o CMakeFiles/cmTC_1aa75.dir/src.c.o -c /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp/src.c
 8 | Linking C executable cmTC_1aa75
 9 | /snap/cmake/888/bin/cmake -E cmake_link_script CMakeFiles/cmTC_1aa75.dir/link.txt --verbose=1
10 | /bin/x86_64-linux-gnu-gcc-7 -fPIC  CMakeFiles/cmTC_1aa75.dir/src.c.o -o cmTC_1aa75 
11 | /usr/bin/ld: CMakeFiles/cmTC_1aa75.dir/src.c.o: in function `main':
12 | src.c:(.text+0x3e): undefined reference to `pthread_create'
13 | /usr/bin/ld: src.c:(.text+0x4a): undefined reference to `pthread_detach'
14 | /usr/bin/ld: src.c:(.text+0x56): undefined reference to `pthread_cancel'
15 | /usr/bin/ld: src.c:(.text+0x67): undefined reference to `pthread_join'
16 | collect2: error: ld returned 1 exit status
17 | make[1]: *** [CMakeFiles/cmTC_1aa75.dir/build.make:99：cmTC_1aa75] 错误 1
18 | make[1]: 离开目录“/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp”
19 | make: *** [Makefile:127：cmTC_1aa75/fast] 错误 2
20 | 
21 | 
22 | Source file was:
23 | #include <pthread.h>
24 | 
25 | static void* test_func(void* data)
26 | {
27 |   return data;
28 | }
29 | 
30 | int main(void)
31 | {
32 |   pthread_t thread;
33 |   pthread_create(&thread, NULL, test_func, NULL);
34 |   pthread_detach(thread);
35 |   pthread_cancel(thread);
36 |   pthread_join(thread, NULL);
37 |   pthread_atfork(NULL, NULL, NULL);
38 |   pthread_exit(NULL);
39 | 
40 |   return 0;
41 | }
42 | 
43 | Determining if the function pthread_create exists in the pthreads failed with the following output:
44 | Change Dir: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp
45 | 
46 | Run Build Command(s):/usr/bin/make -f Makefile cmTC_2b59d/fast && /usr/bin/make  -f CMakeFiles/cmTC_2b59d.dir/build.make CMakeFiles/cmTC_2b59d.dir/build
47 | make[1]: 进入目录“/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp”
48 | Building C object CMakeFiles/cmTC_2b59d.dir/CheckFunctionExists.c.o
49 | /bin/x86_64-linux-gnu-gcc-7   -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create -o CMakeFiles/cmTC_2b59d.dir/CheckFunctionExists.c.o -c /snap/cmake/888/share/cmake-3.20/Modules/CheckFunctionExists.c
50 | Linking C executable cmTC_2b59d
51 | /snap/cmake/888/bin/cmake -E cmake_link_script CMakeFiles/cmTC_2b59d.dir/link.txt --verbose=1
52 | /bin/x86_64-linux-gnu-gcc-7 -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create CMakeFiles/cmTC_2b59d.dir/CheckFunctionExists.c.o -o cmTC_2b59d  -lpthreads 
53 | /usr/bin/ld: 找不到 -lpthreads
54 | collect2: error: ld returned 1 exit status
55 | make[1]: *** [CMakeFiles/cmTC_2b59d.dir/build.make:99：cmTC_2b59d] 错误 1
56 | make[1]: 离开目录“/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/CMakeTmp”
57 | make: *** [Makefile:127：cmTC_2b59d/fast] 错误 2
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/Progress/2:
--------------------------------------------------------------------------------
1 | empty


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/Progress/5:
--------------------------------------------------------------------------------
1 | empty


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/Progress/count.txt:
--------------------------------------------------------------------------------
1 | 16
2 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/rebuild_cache.dir
2 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/einsumlib.dir
3 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/CMakeFiles/edit_cache.dir
4 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/rebuild_cache.dir
5 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir
6 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/edit_cache.dir
7 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/kernels/CMakeFiles/rebuild_cache.dir
8 | /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/kernels/CMakeFiles/edit_cache.dir
9 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/Einsum.cpp" "CMakeFiles/einsum.dir/Einsum.cpp.o" "gcc" "CMakeFiles/einsum.dir/Einsum.cpp.o.d"
12 |   )
13 | 
14 | # Targets to which this target links.
15 | set(CMAKE_TARGET_LINKED_INFO_FILES
16 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/common/CMakeFiles/commonlib.dir/DependInfo.cmake"
17 |   )
18 | 
19 | # Fortran module output directory.
20 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
21 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/einsum.dir/Einsum.cpp.o"
 3 |   "CMakeFiles/einsum.dir/Einsum.cpp.o.d"
 4 |   "libeinsum.a"
 5 |   "libeinsum.pdb"
 6 | )
 7 | 
 8 | # Per-language clean rules from dependency scanning.
 9 | foreach(lang CXX)
10 |   include(CMakeFiles/einsum.dir/cmake_clean_${lang}.cmake OPTIONAL)
11 | endforeach()
12 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/cmake_clean_target.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 |   "libeinsum.a"
3 | )
4 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for einsum.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for einsum.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7
 5 | CXX_DEFINES = 
 6 | 
 7 | CXX_INCLUDES = -I/home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-7.2.1.6/include -I/usr/local/cuda/include -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common -isystem /usr/local/opencv4.4.0/include/opencv4
 8 | 
 9 | CXX_FLAGS = -g -std=gnu++14
10 | 
11 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/link.txt:
--------------------------------------------------------------------------------
1 | /bin/x86_64-linux-gnu-ar qc libeinsum.a CMakeFiles/einsum.dir/Einsum.cpp.o
2 | /bin/x86_64-linux-gnu-ranlib libeinsum.a
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsum.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 12
2 | CMAKE_PROGRESS_2 = 13
3 | 
4 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/Einsum.cpp" "CMakeFiles/einsumlib.dir/Einsum.cpp.o" "gcc" "CMakeFiles/einsumlib.dir/Einsum.cpp.o.d"
12 |   )
13 | 
14 | # Targets to which this target links.
15 | set(CMAKE_TARGET_LINKED_INFO_FILES
16 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/DependInfo.cmake"
17 |   )
18 | 
19 | # Fortran module output directory.
20 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
21 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/einsumlib.dir/Einsum.cpp.o"
 3 |   "CMakeFiles/einsumlib.dir/Einsum.cpp.o.d"
 4 |   "libeinsumlib.a"
 5 |   "libeinsumlib.pdb"
 6 | )
 7 | 
 8 | # Per-language clean rules from dependency scanning.
 9 | foreach(lang CXX)
10 |   include(CMakeFiles/einsumlib.dir/cmake_clean_${lang}.cmake OPTIONAL)
11 | endforeach()
12 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/cmake_clean_target.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 |   "libeinsumlib.a"
3 | )
4 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for einsumlib.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for einsumlib.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for einsumlib.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7
 5 | CXX_DEFINES = 
 6 | 
 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8
 8 | 
 9 | CXX_FLAGS = -g -std=gnu++11
10 | 
11 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/link.txt:
--------------------------------------------------------------------------------
1 | /bin/x86_64-linux-gnu-ar qc libeinsumlib.a CMakeFiles/einsumlib.dir/Einsum.cpp.o
2 | /bin/x86_64-linux-gnu-ranlib libeinsumlib.a
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/einsumlib.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 12
2 | CMAKE_PROGRESS_2 = 13
3 | 
4 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 13
2 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/test.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/Einsum.cpp" "CMakeFiles/test.dir/Einsum.cpp.o" "gcc" "CMakeFiles/test.dir/Einsum.cpp.o.d"
12 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/onnx2trt_gcn.cpp" "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o" "gcc" "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o.d"
13 |   )
14 | 
15 | # Targets to which this target links.
16 | set(CMAKE_TARGET_LINKED_INFO_FILES
17 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/DependInfo.cmake"
18 |   )
19 | 
20 | # Fortran module output directory.
21 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
22 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/test.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/test.dir/Einsum.cpp.o"
 3 |   "CMakeFiles/test.dir/Einsum.cpp.o.d"
 4 |   "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o"
 5 |   "CMakeFiles/test.dir/onnx2trt_gcn.cpp.o.d"
 6 |   "test"
 7 |   "test.pdb"
 8 | )
 9 | 
10 | # Per-language clean rules from dependency scanning.
11 | foreach(lang CXX)
12 |   include(CMakeFiles/test.dir/cmake_clean_${lang}.cmake OPTIONAL)
13 | endforeach()
14 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/test.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for test.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/test.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for test.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/test.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for test.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/test.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7
 5 | CXX_DEFINES = 
 6 | 
 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8
 8 | 
 9 | CXX_FLAGS = -g -std=gnu++11
10 | 
11 | 


--------------------------------------------------------------------------------
/einsum/build/CMakeFiles/test.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 14
2 | CMAKE_PROGRESS_2 = 15
3 | CMAKE_PROGRESS_3 = 16
4 | 
5 | 


--------------------------------------------------------------------------------
/einsum/build/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Debug")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump")
43 | endif()
44 | 
45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY)
46 |   # Include the install script for the subdirectory.
47 |   include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/cmake_install.cmake")
48 | endif()
49 | 
50 | if(CMAKE_INSTALL_COMPONENT)
51 |   set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
52 | else()
53 |   set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
54 | endif()
55 | 
56 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
57 |        "${CMAKE_INSTALL_MANIFEST_FILES}")
58 | file(WRITE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/${CMAKE_INSTALL_MANIFEST}"
59 |      "${CMAKE_INSTALL_MANIFEST_CONTENT}")
60 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/checkMacrosPlugin.cpp" "common/CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o.d"
12 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/cudaDriverWrapper.cpp" "common/CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o.d"
13 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/getOptions.cpp" "common/CMakeFiles/commonlib.dir/getOptions.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/getOptions.cpp.o.d"
14 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/logger.cpp" "common/CMakeFiles/commonlib.dir/logger.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/logger.cpp.o.d"
15 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/nmsHelper.cpp" "common/CMakeFiles/commonlib.dir/nmsHelper.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/nmsHelper.cpp.o.d"
16 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/reducedMathPlugin.cpp" "common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o.d"
17 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleEngines.cpp" "common/CMakeFiles/commonlib.dir/sampleEngines.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleEngines.cpp.o.d"
18 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleInference.cpp" "common/CMakeFiles/commonlib.dir/sampleInference.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleInference.cpp.o.d"
19 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleOptions.cpp" "common/CMakeFiles/commonlib.dir/sampleOptions.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleOptions.cpp.o.d"
20 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/sampleReporting.cpp" "common/CMakeFiles/commonlib.dir/sampleReporting.cpp.o" "gcc" "common/CMakeFiles/commonlib.dir/sampleReporting.cpp.o.d"
21 |   )
22 | 
23 | # Targets to which this target links.
24 | set(CMAKE_TARGET_LINKED_INFO_FILES
25 |   )
26 | 
27 | # Fortran module output directory.
28 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
29 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o"
 3 |   "CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o.d"
 4 |   "CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o"
 5 |   "CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o.d"
 6 |   "CMakeFiles/commonlib.dir/getOptions.cpp.o"
 7 |   "CMakeFiles/commonlib.dir/getOptions.cpp.o.d"
 8 |   "CMakeFiles/commonlib.dir/logger.cpp.o"
 9 |   "CMakeFiles/commonlib.dir/logger.cpp.o.d"
10 |   "CMakeFiles/commonlib.dir/nmsHelper.cpp.o"
11 |   "CMakeFiles/commonlib.dir/nmsHelper.cpp.o.d"
12 |   "CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o"
13 |   "CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o.d"
14 |   "CMakeFiles/commonlib.dir/sampleEngines.cpp.o"
15 |   "CMakeFiles/commonlib.dir/sampleEngines.cpp.o.d"
16 |   "CMakeFiles/commonlib.dir/sampleInference.cpp.o"
17 |   "CMakeFiles/commonlib.dir/sampleInference.cpp.o.d"
18 |   "CMakeFiles/commonlib.dir/sampleOptions.cpp.o"
19 |   "CMakeFiles/commonlib.dir/sampleOptions.cpp.o.d"
20 |   "CMakeFiles/commonlib.dir/sampleReporting.cpp.o"
21 |   "CMakeFiles/commonlib.dir/sampleReporting.cpp.o.d"
22 |   "libcommonlib.a"
23 |   "libcommonlib.pdb"
24 | )
25 | 
26 | # Per-language clean rules from dependency scanning.
27 | foreach(lang CXX)
28 |   include(CMakeFiles/commonlib.dir/cmake_clean_${lang}.cmake OPTIONAL)
29 | endforeach()
30 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/cmake_clean_target.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 |   "libcommonlib.a"
3 | )
4 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for commonlib.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for commonlib.
3 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for commonlib.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7
 5 | CXX_DEFINES = 
 6 | 
 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common
 8 | 
 9 | CXX_FLAGS = -g -std=gnu++14
10 | 
11 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/link.txt:
--------------------------------------------------------------------------------
1 | /bin/x86_64-linux-gnu-ar qc libcommonlib.a CMakeFiles/commonlib.dir/checkMacrosPlugin.cpp.o CMakeFiles/commonlib.dir/cudaDriverWrapper.cpp.o CMakeFiles/commonlib.dir/getOptions.cpp.o CMakeFiles/commonlib.dir/logger.cpp.o CMakeFiles/commonlib.dir/nmsHelper.cpp.o CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o CMakeFiles/commonlib.dir/sampleEngines.cpp.o CMakeFiles/commonlib.dir/sampleInference.cpp.o CMakeFiles/commonlib.dir/sampleOptions.cpp.o CMakeFiles/commonlib.dir/sampleReporting.cpp.o
2 | /bin/x86_64-linux-gnu-ranlib libcommonlib.a
3 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/progress.make:
--------------------------------------------------------------------------------
 1 | CMAKE_PROGRESS_1 = 1
 2 | CMAKE_PROGRESS_2 = 2
 3 | CMAKE_PROGRESS_3 = 3
 4 | CMAKE_PROGRESS_4 = 4
 5 | CMAKE_PROGRESS_5 = 5
 6 | CMAKE_PROGRESS_6 = 6
 7 | CMAKE_PROGRESS_7 = 7
 8 | CMAKE_PROGRESS_8 = 8
 9 | CMAKE_PROGRESS_9 = 9
10 | CMAKE_PROGRESS_10 = 10
11 | CMAKE_PROGRESS_11 = 11
12 | 
13 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o.d:
--------------------------------------------------------------------------------
1 | common/CMakeFiles/commonlib.dir/reducedMathPlugin.cpp.o: \
2 |  /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/reducedMathPlugin.cpp \
3 |  /usr/include/stdc-predef.h
4 | 


--------------------------------------------------------------------------------
/einsum/build/common/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 11
2 | 


--------------------------------------------------------------------------------
/einsum/build/common/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Debug")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump")
43 | endif()
44 | 
45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY)
46 |   # Include the install script for the subdirectory.
47 |   include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/common/kernels/cmake_install.cmake")
48 | endif()
49 | 
50 | 


--------------------------------------------------------------------------------
/einsum/build/common/kernels/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/einsum/build/common/kernels/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/einsum/build/common/kernels/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/common/kernels
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Debug")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump")
43 | endif()
44 | 
45 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/checkMacrosPlugin.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o.d"
12 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/cudaDriverWrapper.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o.d"
13 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/getOptions.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o.d"
14 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/logger.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/logger.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/logger.cpp.o.d"
15 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/nmsHelper.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o.d"
16 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/reducedMathPlugin.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o.d"
17 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleEngines.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o.d"
18 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleInference.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o.d"
19 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleOptions.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o.d"
20 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/sampleReporting.cpp" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o" "gcc" "einsum_common7/CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o.d"
21 |   )
22 | 
23 | # Targets to which this target links.
24 | set(CMAKE_TARGET_LINKED_INFO_FILES
25 |   )
26 | 
27 | # Fortran module output directory.
28 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
29 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o"
 3 |   "CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o.d"
 4 |   "CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o"
 5 |   "CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o.d"
 6 |   "CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o"
 7 |   "CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o.d"
 8 |   "CMakeFiles/einsum_common7_lib.dir/logger.cpp.o"
 9 |   "CMakeFiles/einsum_common7_lib.dir/logger.cpp.o.d"
10 |   "CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o"
11 |   "CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o.d"
12 |   "CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o"
13 |   "CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o.d"
14 |   "CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o"
15 |   "CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o.d"
16 |   "CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o"
17 |   "CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o.d"
18 |   "CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o"
19 |   "CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o.d"
20 |   "CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o"
21 |   "CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o.d"
22 |   "libeinsum_common7_lib.a"
23 |   "libeinsum_common7_lib.pdb"
24 | )
25 | 
26 | # Per-language clean rules from dependency scanning.
27 | foreach(lang CXX)
28 |   include(CMakeFiles/einsum_common7_lib.dir/cmake_clean_${lang}.cmake OPTIONAL)
29 | endforeach()
30 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/cmake_clean_target.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 |   "libeinsum_common7_lib.a"
3 | )
4 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for einsum_common7_lib.
3 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for einsum_common7_lib.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7
 5 | CXX_DEFINES = 
 6 | 
 7 | CXX_INCLUDES = -I/home/xzy/Data2/xzyLinuxInstallPackage/5TensorRT/TensorRT-7.2.1.6/include -I/usr/local/cuda/include -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/./einsum_common7 -isystem /usr/local/opencv4.4.0/include/opencv4
 8 | 
 9 | CXX_FLAGS = -g -std=gnu++14
10 | 
11 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/link.txt:
--------------------------------------------------------------------------------
1 | /bin/x86_64-linux-gnu-ar qc libeinsum_common7_lib.a CMakeFiles/einsum_common7_lib.dir/checkMacrosPlugin.cpp.o CMakeFiles/einsum_common7_lib.dir/cudaDriverWrapper.cpp.o CMakeFiles/einsum_common7_lib.dir/getOptions.cpp.o CMakeFiles/einsum_common7_lib.dir/logger.cpp.o CMakeFiles/einsum_common7_lib.dir/nmsHelper.cpp.o CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleEngines.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleInference.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleOptions.cpp.o CMakeFiles/einsum_common7_lib.dir/sampleReporting.cpp.o
2 | /bin/x86_64-linux-gnu-ranlib libeinsum_common7_lib.a
3 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/progress.make:
--------------------------------------------------------------------------------
 1 | CMAKE_PROGRESS_1 = 1
 2 | CMAKE_PROGRESS_2 = 2
 3 | CMAKE_PROGRESS_3 = 3
 4 | CMAKE_PROGRESS_4 = 4
 5 | CMAKE_PROGRESS_5 = 5
 6 | CMAKE_PROGRESS_6 = 6
 7 | CMAKE_PROGRESS_7 = 7
 8 | CMAKE_PROGRESS_8 = 8
 9 | CMAKE_PROGRESS_9 = 9
10 | CMAKE_PROGRESS_10 = 10
11 | CMAKE_PROGRESS_11 = 11
12 | 
13 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o.d:
--------------------------------------------------------------------------------
1 | einsum_common7/CMakeFiles/einsum_common7_lib.dir/reducedMathPlugin.cpp.o: \
2 |  /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/reducedMathPlugin.cpp \
3 |  /usr/include/stdc-predef.h
4 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 11
2 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Debug")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump")
43 | endif()
44 | 
45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY)
46 |   # Include the install script for the subdirectory.
47 |   include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common7/kernels/cmake_install.cmake")
48 | endif()
49 | 
50 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/kernels/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/kernels/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common7/kernels/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common7/kernels
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Debug")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump")
43 | endif()
44 | 
45 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/checkMacrosPlugin.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o.d"
12 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/cudaDriverWrapper.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o.d"
13 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/getOptions.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o.d"
14 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/logger.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/logger.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/logger.cpp.o.d"
15 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/nmsHelper.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o.d"
16 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/reducedMathPlugin.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o.d"
17 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleEngines.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o.d"
18 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleInference.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o.d"
19 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleOptions.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o.d"
20 |   "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/sampleReporting.cpp" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o" "gcc" "einsum_common8/CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o.d"
21 |   )
22 | 
23 | # Targets to which this target links.
24 | set(CMAKE_TARGET_LINKED_INFO_FILES
25 |   )
26 | 
27 | # Fortran module output directory.
28 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
29 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o"
 3 |   "CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o.d"
 4 |   "CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o"
 5 |   "CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o.d"
 6 |   "CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o"
 7 |   "CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o.d"
 8 |   "CMakeFiles/einsum_common8_lib.dir/logger.cpp.o"
 9 |   "CMakeFiles/einsum_common8_lib.dir/logger.cpp.o.d"
10 |   "CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o"
11 |   "CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o.d"
12 |   "CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o"
13 |   "CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o.d"
14 |   "CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o"
15 |   "CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o.d"
16 |   "CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o"
17 |   "CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o.d"
18 |   "CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o"
19 |   "CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o.d"
20 |   "CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o"
21 |   "CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o.d"
22 |   "libeinsum_common8_lib.a"
23 |   "libeinsum_common8_lib.pdb"
24 | )
25 | 
26 | # Per-language clean rules from dependency scanning.
27 | foreach(lang CXX)
28 |   include(CMakeFiles/einsum_common8_lib.dir/cmake_clean_${lang}.cmake OPTIONAL)
29 | endforeach()
30 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/cmake_clean_target.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 |   "libeinsum_common8_lib.a"
3 | )
4 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for einsum_common8_lib.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for einsum_common8_lib.
3 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for einsum_common8_lib.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # compile CXX with /bin/x86_64-linux-gnu-g++-7
 5 | CXX_DEFINES = 
 6 | 
 7 | CXX_INCLUDES = -I/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8
 8 | 
 9 | CXX_FLAGS = -g -std=gnu++11
10 | 
11 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/link.txt:
--------------------------------------------------------------------------------
1 | /bin/x86_64-linux-gnu-ar qc libeinsum_common8_lib.a CMakeFiles/einsum_common8_lib.dir/checkMacrosPlugin.cpp.o CMakeFiles/einsum_common8_lib.dir/cudaDriverWrapper.cpp.o CMakeFiles/einsum_common8_lib.dir/getOptions.cpp.o CMakeFiles/einsum_common8_lib.dir/logger.cpp.o CMakeFiles/einsum_common8_lib.dir/nmsHelper.cpp.o CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleEngines.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleInference.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleOptions.cpp.o CMakeFiles/einsum_common8_lib.dir/sampleReporting.cpp.o
2 | /bin/x86_64-linux-gnu-ranlib libeinsum_common8_lib.a
3 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/progress.make:
--------------------------------------------------------------------------------
 1 | CMAKE_PROGRESS_1 = 1
 2 | CMAKE_PROGRESS_2 = 2
 3 | CMAKE_PROGRESS_3 = 3
 4 | CMAKE_PROGRESS_4 = 4
 5 | CMAKE_PROGRESS_5 = 5
 6 | CMAKE_PROGRESS_6 = 6
 7 | CMAKE_PROGRESS_7 = 7
 8 | CMAKE_PROGRESS_8 = 8
 9 | CMAKE_PROGRESS_9 = 9
10 | CMAKE_PROGRESS_10 = 10
11 | CMAKE_PROGRESS_11 = 11
12 | 
13 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o.d:
--------------------------------------------------------------------------------
1 | einsum_common8/CMakeFiles/einsum_common8_lib.dir/reducedMathPlugin.cpp.o: \
2 |  /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/reducedMathPlugin.cpp \
3 |  /usr/include/stdc-predef.h
4 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 11
2 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Debug")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump")
43 | endif()
44 | 
45 | if(NOT CMAKE_INSTALL_LOCAL_ONLY)
46 |   # Include the install script for the subdirectory.
47 |   include("/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build/einsum_common8/kernels/cmake_install.cmake")
48 | endif()
49 | 
50 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/kernels/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.20
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/kernels/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/einsum/build/einsum_common8/kernels/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/xzy/G/DeepLearning/Gitee/TensorRT/CPP/TensorRT/einsum/einsum_common8/kernels
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Debug")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/bin/x86_64-linux-gnu-objdump")
43 | endif()
44 | 
45 | 


--------------------------------------------------------------------------------
/einsum/build/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/einsum/build/test


--------------------------------------------------------------------------------
/einsum/einsum_common7/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(kernels)
2 | AUX_SOURCE_DIRECTORY(./ DIR_LIB_SRCS)
3 | #SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
4 | add_library(einsum_common_lib ${DIR_LIB_SRCS})
5 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/EntropyCalibrator.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef ENTROPY_CALIBRATOR_H
 18 | #define ENTROPY_CALIBRATOR_H
 19 | 
 20 | #include "BatchStream.h"
 21 | #include "NvInfer.h"
 22 | 
 23 | //! \class EntropyCalibratorImpl
 24 | //!
 25 | //! \brief Implements common functionality for Entropy calibrators.
 26 | //!
 27 | template <typename TBatchStream>
 28 | class EntropyCalibratorImpl
 29 | {
 30 | public:
 31 |     EntropyCalibratorImpl(
 32 |         TBatchStream stream, int firstBatch, std::string networkName, const char* inputBlobName, bool readCache = true)
 33 |         : mStream{stream}
 34 |         , mCalibrationTableName("CalibrationTable" + networkName)
 35 |         , mInputBlobName(inputBlobName)
 36 |         , mReadCache(readCache)
 37 |     {
 38 |         nvinfer1::Dims dims = mStream.getDims();
 39 |         mInputCount = samplesCommon::volume(dims);
 40 |         CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
 41 |         mStream.reset(firstBatch);
 42 |     }
 43 | 
 44 |     virtual ~EntropyCalibratorImpl()
 45 |     {
 46 |         CHECK(cudaFree(mDeviceInput));
 47 |     }
 48 | 
 49 |     int getBatchSize() const
 50 |     {
 51 |         return mStream.getBatchSize();
 52 |     }
 53 | 
 54 |     bool getBatch(void* bindings[], const char* names[], int nbBindings)
 55 |     {
 56 |         if (!mStream.next())
 57 |         {
 58 |             return false;
 59 |         }
 60 |         CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
 61 |         assert(!strcmp(names[0], mInputBlobName));
 62 |         bindings[0] = mDeviceInput;
 63 |         return true;
 64 |     }
 65 | 
 66 |     const void* readCalibrationCache(size_t& length)
 67 |     {
 68 |         mCalibrationCache.clear();
 69 |         std::ifstream input(mCalibrationTableName, std::ios::binary);
 70 |         input >> std::noskipws;
 71 |         if (mReadCache && input.good())
 72 |         {
 73 |             std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
 74 |                 std::back_inserter(mCalibrationCache));
 75 |         }
 76 |         length = mCalibrationCache.size();
 77 |         return length ? mCalibrationCache.data() : nullptr;
 78 |     }
 79 | 
 80 |     void writeCalibrationCache(const void* cache, size_t length)
 81 |     {
 82 |         std::ofstream output(mCalibrationTableName, std::ios::binary);
 83 |         output.write(reinterpret_cast<const char*>(cache), length);
 84 |     }
 85 | 
 86 | private:
 87 |     TBatchStream mStream;
 88 |     size_t mInputCount;
 89 |     std::string mCalibrationTableName;
 90 |     const char* mInputBlobName;
 91 |     bool mReadCache{true};
 92 |     void* mDeviceInput{nullptr};
 93 |     std::vector<char> mCalibrationCache;
 94 | };
 95 | 
 96 | //! \class Int8EntropyCalibrator2
 97 | //!
 98 | //! \brief Implements Entropy calibrator 2.
 99 | //!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
100 | //!
101 | template <typename TBatchStream>
102 | class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2
103 | {
104 | public:
105 |     Int8EntropyCalibrator2(
106 |         TBatchStream stream, int firstBatch, const char* networkName, const char* inputBlobName, bool readCache = true)
107 |         : mImpl(stream, firstBatch, networkName, inputBlobName, readCache)
108 |     {
109 |     }
110 | 
111 |     int getBatchSize() const override
112 |     {
113 |         return mImpl.getBatchSize();
114 |     }
115 | 
116 |     bool getBatch(void* bindings[], const char* names[], int nbBindings) override
117 |     {
118 |         return mImpl.getBatch(bindings, names, nbBindings);
119 |     }
120 | 
121 |     const void* readCalibrationCache(size_t& length) override
122 |     {
123 |         return mImpl.readCalibrationCache(length);
124 |     }
125 | 
126 |     void writeCalibrationCache(const void* cache, size_t length) override
127 |     {
128 |         mImpl.writeCalibrationCache(cache, length);
129 |     }
130 | 
131 | private:
132 |     EntropyCalibratorImpl<TBatchStream> mImpl;
133 | };
134 | 
135 | #endif // ENTROPY_CALIBRATOR_H
136 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/bboxUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef TRT_BBOX_UTILS_H
18 | #define TRT_BBOX_UTILS_H
19 | 
20 | #include "plugin.h"
21 | 
22 | using namespace nvinfer1;
23 | using namespace nvinfer1::plugin;
24 | 
25 | template <typename T>
26 | struct Bbox
27 | {
28 |     T xmin, ymin, xmax, ymax;
29 |     Bbox(T xmin, T ymin, T xmax, T ymax)
30 |         : xmin(xmin)
31 |         , ymin(ymin)
32 |         , xmax(xmax)
33 |         , ymax(ymax)
34 |     {
35 |     }
36 |     Bbox() = default;
37 | };
38 | 
39 | template <typename T>
40 | struct BboxInfo
41 | {
42 |     T conf_score;
43 |     int label;
44 |     int bbox_idx;
45 |     bool kept;
46 |     BboxInfo(T conf_score, int label, int bbox_idx, bool kept)
47 |         : conf_score(conf_score)
48 |         , label(label)
49 |         , bbox_idx(bbox_idx)
50 |         , kept(kept)
51 |     {
52 |     }
53 |     BboxInfo() = default;
54 | };
55 | 
56 | template <typename TFloat>
57 | bool operator<(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
58 | {
59 |     return lhs.x1 < rhs.x1;
60 | }
61 | 
62 | template <typename TFloat>
63 | bool operator==(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
64 | {
65 |     return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2;
66 | }
67 | // }}}
68 | 
69 | int8_t* alignPtr(int8_t* ptr, uintptr_t to);
70 | 
71 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize);
72 | 
73 | size_t dataTypeSize(DataType dtype);
74 | 
75 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets);
76 | 
77 | #endif
78 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/checkMacrosPlugin.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #include "checkMacrosPlugin.h"
 18 | #include <cstdlib>
 19 | #include <cublas_v2.h>
 20 | #include <cuda_runtime.h>
 21 | 
 22 | namespace nvinfer1
 23 | {
 24 | namespace plugin
 25 | {
 26 | 
 27 | // This will be populated by the logger supplied by the user to initLibNvInferPlugins()
 28 | ILogger* gLogger{};
 29 | 
 30 | template <ILogger::Severity kSeverity>
 31 | int LogStream<kSeverity>::Buf::sync()
 32 | {
 33 |     std::string s = str();
 34 |     while (!s.empty() && s.back() == '\n')
 35 |     {
 36 |         s.pop_back();
 37 |     }
 38 |     if (gLogger != nullptr)
 39 |     {
 40 |         gLogger->log(kSeverity, s.c_str());
 41 |     }
 42 |     str("");
 43 |     return 0;
 44 | }
 45 | 
 46 | // These use gLogger, and therefore require initLibNvInferPlugins() to be called with a logger
 47 | // (otherwise, it will not log)
 48 | LogStream<ILogger::Severity::kERROR> gLogError;
 49 | LogStream<ILogger::Severity::kWARNING> gLogWarning;
 50 | LogStream<ILogger::Severity::kINFO> gLogInfo;
 51 | LogStream<ILogger::Severity::kVERBOSE> gLogVerbose;
 52 | 
 53 | // break-pointable
 54 | void throwCudaError(const char* file, const char* function, int line, int status, const char* msg)
 55 | {
 56 |     CudaError error(file, function, line, status, msg);
 57 |     error.log(gLogError);
 58 |     throw error;
 59 | }
 60 | 
 61 | // break-pointable
 62 | void throwCublasError(const char* file, const char* function, int line, int status, const char* msg)
 63 | {
 64 |     if (msg == nullptr)
 65 |     {
 66 |         auto s_ = static_cast<cublasStatus_t>(status);
 67 |         switch (s_)
 68 |         {
 69 |         case CUBLAS_STATUS_SUCCESS: msg = "CUBLAS_STATUS_SUCCESS"; break;
 70 |         case CUBLAS_STATUS_NOT_INITIALIZED: msg = "CUBLAS_STATUS_NOT_INITIALIZED"; break;
 71 |         case CUBLAS_STATUS_ALLOC_FAILED: msg = "CUBLAS_STATUS_ALLOC_FAILED"; break;
 72 |         case CUBLAS_STATUS_INVALID_VALUE: msg = "CUBLAS_STATUS_INVALID_VALUE"; break;
 73 |         case CUBLAS_STATUS_ARCH_MISMATCH: msg = "CUBLAS_STATUS_ARCH_MISMATCH"; break;
 74 |         case CUBLAS_STATUS_MAPPING_ERROR: msg = "CUBLAS_STATUS_MAPPING_ERROR"; break;
 75 |         case CUBLAS_STATUS_EXECUTION_FAILED: msg = "CUBLAS_STATUS_EXECUTION_FAILED"; break;
 76 |         case CUBLAS_STATUS_INTERNAL_ERROR: msg = "CUBLAS_STATUS_INTERNAL_ERROR"; break;
 77 |         case CUBLAS_STATUS_NOT_SUPPORTED: msg = "CUBLAS_STATUS_NOT_SUPPORTED"; break;
 78 |         case CUBLAS_STATUS_LICENSE_ERROR: msg = "CUBLAS_STATUS_LICENSE_ERROR"; break;
 79 |         }
 80 |     }
 81 |     CublasError error(file, function, line, status, msg);
 82 |     error.log(gLogError);
 83 |     throw error;
 84 | }
 85 | 
 86 | // break-pointable
 87 | void throwCudnnError(const char* file, const char* function, int line, int status, const char* msg)
 88 | {
 89 |     CudnnError error(file, function, line, status, msg);
 90 |     error.log(gLogError);
 91 |     throw error;
 92 | }
 93 | 
 94 | void logError(const char* msg, const char* file, const char* fn, int line)
 95 | {
 96 |     gLogError << "Parameter check failed at: " << file << "::" << fn << "::" << line;
 97 |     gLogError << ", condition: " << msg << std::endl;
 98 | }
 99 | 
100 | // break-pointable
101 | void reportAssertion(const char* msg, const char* file, int line)
102 | {
103 |     std::ostringstream stream;
104 |     stream << "Assertion failed: " << msg << std::endl
105 |            << file << ':' << line << std::endl
106 |            << "Aborting..." << std::endl;
107 |     getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str());
108 |     cudaDeviceReset();
109 |     abort();
110 | }
111 | 
112 | void TRTException::log(std::ostream& logStream) const
113 | {
114 |     logStream << file << " (" << line << ") - " << name << " Error in " << function << ": " << status;
115 |     if (message != nullptr)
116 |     {
117 |         logStream << " (" << message << ")";
118 |     }
119 |     logStream << std::endl;
120 | }
121 | 
122 | } // namespace plugin
123 | 
124 | } // namespace nvinfer1
125 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/cub_helper.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "kernel.h"
18 | template <typename KeyT, typename ValueT>
19 | size_t cubSortPairsWorkspaceSize(int num_items, int num_segments)
20 | {
21 |     size_t temp_storage_bytes = 0;
22 |     cub::DeviceSegmentedRadixSort::SortPairsDescending((void*) NULL, temp_storage_bytes, (const KeyT*) NULL,
23 |         (KeyT*) NULL, (const ValueT*) NULL, (ValueT*) NULL,
24 |         num_items,    // # items
25 |         num_segments, // # segments
26 |         (const int*) NULL, (const int*) NULL);
27 |     return temp_storage_bytes;
28 | }
29 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/kernels/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | file(GLOB SRCS *.cpp)
17 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
18 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
19 | file(GLOB CU_SRCS *.cu)
20 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} ${CU_SRCS})
21 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} PARENT_SCOPE)
22 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/kernels/extractFgScores.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "kernel.h"
17 | 
18 | template <typename T>
19 | pluginStatus_t extractFgScores_gpu(cudaStream_t stream,
20 |                                   int N,
21 |                                   int A,
22 |                                   int H,
23 |                                   int W,
24 |                                   const void* scores,
25 |                                   void* fgScores)
26 | {
27 |     // Copy all the objectness scores for one batch
28 |     size_t size = A * H * W * sizeof(T);
29 |     for (int n = 0; n < N; n++)
30 |     {
31 |         // Find out the starting pointer of the objectness scores in the input
32 |         size_t offset_ld = (n * 2 + 1) * A * H * W;
33 |         // Find out the starting pointer of the objectness scores in the output
34 |         size_t offset_st = n * A * H * W;
35 |         CSC(cudaMemcpyAsync(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, cudaMemcpyDeviceToDevice, stream), STATUS_FAILURE);
36 |     }
37 | 
38 |     return STATUS_SUCCESS;
39 | }
40 | 
41 | template <typename T>
42 | pluginStatus_t extractFgScores_cpu(int N,
43 |                                   int A,
44 |                                   int H,
45 |                                   int W,
46 |                                   const void* scores,
47 |                                   void* fgScores)
48 | {
49 |     size_t size = A * H * W * sizeof(T);
50 |     for (int n = 0; n < N; n++)
51 |     {
52 |         size_t offset_ld = (n * 2 + 1) * A * H * W;
53 |         size_t offset_st = n * A * H * W;
54 |         memcpy(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size);
55 |     }
56 |     return STATUS_SUCCESS;
57 | }
58 | 
59 | pluginStatus_t extractFgScores(cudaStream_t stream,
60 |                               const int N,
61 |                               const int A,
62 |                               const int H,
63 |                               const int W,
64 |                               const DataType t_scores,
65 |                               const DLayout_t l_scores,
66 |                               const void* scores,
67 |                               const DataType t_fgScores,
68 |                               const DLayout_t l_fgScores,
69 |                               void* fgScores)
70 | {
71 |     if (l_fgScores != NCHW || l_scores != NCHW)
72 |         return STATUS_BAD_PARAM;
73 | 
74 |     if (t_fgScores != DataType::kFLOAT)
75 |         return STATUS_BAD_PARAM;
76 | 
77 |     if (t_scores != DataType::kFLOAT)
78 |         return STATUS_BAD_PARAM;
79 | 
80 |     return extractFgScores_gpu<float>(stream, N, A, H, W, scores, fgScores);
81 | }
82 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/kernels/generateAnchors.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include "kernel.h"
 17 | #include <cstdio>
 18 | 
 19 | pluginStatus_t generateAnchors_cpu(int numRatios,
 20 |                                   float* ratios,
 21 |                                   int numScales,
 22 |                                   float* scales,
 23 |                                   int baseSize,
 24 |                                   float* anchors)
 25 | {
 26 | #ifdef DEBUG
 27 |     DEBUG_PRINTF("Generating Anchors with:\n");
 28 |     DEBUG_PRINTF("Scales:");
 29 |     for (int s = 0; s < numScales; ++s)
 30 |     {
 31 |         DEBUG_PRINTF("%f\t", scales[s]);
 32 |     }
 33 |     DEBUG_PRINTF("\n");
 34 |     DEBUG_PRINTF("Ratios:");
 35 |     for (int r = 0; r < numRatios; ++r)
 36 |     {
 37 |         DEBUG_PRINTF("%f\t", ratios[r]);
 38 |     }
 39 |     DEBUG_PRINTF("\n");
 40 | #endif
 41 | 
 42 |     if ((numScales <= 0) || (numRatios <= 0) || (baseSize <= 0))
 43 |     {
 44 |         return STATUS_BAD_PARAM;
 45 |     }
 46 | 
 47 |     // Generate parameters for numRatios * numScales general anchor boxes
 48 |     for (int r = 0; r < numRatios; ++r)
 49 |     {
 50 |         for (int s = 0; s < numScales; ++s)
 51 |         {
 52 |             int id = r * numScales + s;
 53 |             float scale = scales[s];
 54 |             float ratio = ratios[r];
 55 |             float bs = baseSize;
 56 |             float ws = round(sqrt((float) (bs * bs) / ratio));
 57 |             float hs = round(ws * ratio);
 58 |             // Width: bs / sqrt(ratio) * scale
 59 |             // Height: bs * sqrt(ratio) * scale
 60 |             ws *= scale;
 61 |             hs *= scale;
 62 | 
 63 |             // x_anchor_ctr
 64 |             /*
 65 |              * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes.
 66 |              * Because the center of anchor box in the original input raw image scale will not be dependent on this. 
 67 |              */
 68 |             anchors[id * 4] = (bs - 1) / 2;
 69 |             // y_anchor_ctr
 70 |             /*
 71 |              * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes.
 72 |              * Because the center of anchor box in the original input raw image scale will not be dependent on this. 
 73 |              */
 74 |             anchors[id * 4 + 1] = (bs - 1) / 2;
 75 |             // w_anchor
 76 |             anchors[id * 4 + 2] = ws;
 77 |             // h_anchor
 78 |             anchors[id * 4 + 3] = hs;
 79 |         }
 80 |     }
 81 |     return STATUS_SUCCESS;
 82 | }
 83 | 
 84 | pluginStatus_t generateAnchors(cudaStream_t stream,
 85 |                               int numRatios,
 86 |                               float* ratios,
 87 |                               int numScales,
 88 |                               float* scales,
 89 |                               int baseSize,
 90 |                               float* anchors)
 91 | {
 92 |     // Each anchor box has 4 parameters
 93 |     int ac = numRatios * numScales * 4;
 94 |     float* anchors_cpu;
 95 |     cudaMallocHost((void**) &anchors_cpu, sizeof(float) * ac);
 96 |     pluginStatus_t status = generateAnchors_cpu(numRatios, ratios, numScales, scales, baseSize, anchors_cpu);
 97 |     cudaMemcpyAsync(anchors, anchors_cpu, sizeof(float) * ac, cudaMemcpyHostToDevice, stream);
 98 |     cudaFreeHost(anchors_cpu);
 99 |     return status;
100 | }
101 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/kernels/kernel.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "kernel.h"
18 | #include "plugin.h"
19 | 
20 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
21 |     int topK, DataType DT_BBOX, DataType DT_SCORE)
22 | {
23 |     size_t wss[7];
24 |     wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX);
25 |     wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX);
26 |     wss[2] = detectionForwardPreNMSSize(N, C2);
27 |     wss[3] = detectionForwardPreNMSSize(N, C2);
28 |     wss[4] = detectionForwardPostNMSSize(N, numClasses, topK);
29 |     wss[5] = detectionForwardPostNMSSize(N, numClasses, topK);
30 |     wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE),
31 |         sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE));
32 |     return calculateTotalWorkspaceSize(wss, 7);
33 | }
34 | 
35 | namespace nvinfer1
36 | {
37 | namespace plugin
38 | {
39 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
40 |     int topK, DataType DT_BBOX, DataType DT_SCORE)
41 | {
42 |     size_t wss[7];
43 |     wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX);
44 |     wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX);
45 |     wss[2] = detectionForwardPreNMSSize(N, C2);
46 |     wss[3] = detectionForwardPreNMSSize(N, C2);
47 |     wss[4] = detectionForwardPostNMSSize(N, numClasses, topK);
48 |     wss[5] = detectionForwardPostNMSSize(N, numClasses, topK);
49 |     wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE),
50 |         sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE));
51 |     return calculateTotalWorkspaceSize(wss, 7);
52 | }
53 | } // namespace plugin
54 | } // namespace nvinfer1
55 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/kernels/lReLU.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "kernel.h"
18 | 
19 | template <unsigned nthdsPerCTA>
20 | __launch_bounds__(nthdsPerCTA) __global__
21 |     void pReLUKernel(const int n, const float negativeSlope, const float* input, float* output)
22 | {
23 |     for (int i = blockIdx.x * nthdsPerCTA + threadIdx.x; i < n; i += gridDim.x * nthdsPerCTA)
24 |     {
25 |         output[i] = input[i] > 0 ? input[i] : input[i] * negativeSlope;
26 |     }
27 | }
28 | 
29 | pluginStatus_t lReLUGPU(cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output)
30 | {
31 |     const int BS = 512;
32 |     const int GS = (n + BS - 1) / BS;
33 |     pReLUKernel<BS><<<GS, BS, 0, stream>>>(n, negativeSlope,
34 |                                            (const float*) input,
35 |                                            (float*) output);
36 |     return STATUS_SUCCESS;
37 | }
38 | 
39 | pluginStatus_t lReLUInference(
40 |     cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output)
41 | {
42 |     return lReLUGPU(stream, n, negativeSlope, (const float*) input, (float*) output);
43 | }
44 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/kernels/reducedMathPlugin.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef _REDUCED_MATH_PLUGIN_H
 18 | #define _REDUCED_MATH_PLUGIN_H
 19 | // Dynamically strength-reduced div and mod
 20 | //
 21 | // Ideas taken from Sean Baxter's MGPU library.
 22 | // These classes provide for reduced complexity division and modulus
 23 | // on integers, for the case where the same divisor or modulus will
 24 | // be used repeatedly.
 25 | 
 26 | namespace nvinfer1
 27 | {
 28 | namespace plugin
 29 | {
 30 | namespace detail
 31 | {
 32 | 
 33 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff);
 34 | 
 35 | __host__ __device__ __forceinline__ unsigned int umulhi(unsigned int x, unsigned int y)
 36 | {
 37 | #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 100
 38 |     return __umulhi(x, y);
 39 | #else
 40 |     unsigned long long z = (unsigned long long) x * (unsigned long long) y;
 41 |     return (unsigned int) (z >> 32);
 42 | #endif
 43 | }
 44 | 
 45 | // This is a weird implementation that returns div_up(0,1)=0 but
 46 | // div_up(0,2)=1 (wrong) -- just do not use it with a=0.
 47 | __host__ __device__ inline int div_up(int a, int b)
 48 | {
 49 |     return (a - 1) / b + 1;
 50 | }
 51 | 
 52 | } // end namespace detail
 53 | 
 54 | class reduced_divisor
 55 | {
 56 | public:
 57 |     reduced_divisor() {}
 58 |     __host__ __forceinline__ reduced_divisor(int _y)
 59 |         : y(_y)
 60 |     {
 61 |         detail::find_divisor(y, mul_coeff, shift_coeff);
 62 |     }
 63 |     __host__ __device__ __forceinline__ reduced_divisor(unsigned _mul_coeff, unsigned _shift_coeff, int _y)
 64 |         : mul_coeff(_mul_coeff)
 65 |         , shift_coeff(_shift_coeff)
 66 |         , y(_y)
 67 |     {
 68 |     }
 69 |     __host__ __device__ __forceinline__ int div(int x) const
 70 |     {
 71 |         // if dividing by 1, then find_divisor wouldn't have worked because
 72 |         // mul_coeff would have had to be 2^32, which can't be represented,
 73 |         // so we have to special case that one.
 74 |         return (y != 1) ? detail::umulhi((unsigned int) x, mul_coeff) >> shift_coeff : x;
 75 |     }
 76 |     __host__ __device__ __forceinline__ int mod(int x) const
 77 |     {
 78 |         return x - (div(x) * y);
 79 |     }
 80 |     __host__ __device__ __forceinline__ void divmod(int x, int& q, int& mod) const
 81 |     {
 82 |         q = div(x);
 83 |         mod = x - (q * y);
 84 |     }
 85 |     __host__ __device__ __forceinline__ int get() const
 86 |     {
 87 |         return y;
 88 |     }
 89 |     inline __host__ void get_mul_shift(unsigned& mul, unsigned& shift)
 90 |     {
 91 |         mul = mul_coeff;
 92 |         shift = shift_coeff;
 93 |     }
 94 | 
 95 | protected:
 96 |     unsigned int mul_coeff;
 97 |     unsigned int shift_coeff;
 98 |     int y;
 99 | };
100 | 
101 | } // namespace plugin
102 | 
103 | } // namespace nvinfer1
104 | #endif /*_REDUCED_MATH_PLUGIN_H*/
105 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/kernels/reorgForward.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "reducedMathPlugin.h"
17 | #include "kernel.h"
18 | 
19 | using namespace nvinfer1::plugin; // for reduced_divisor
20 | 
21 | template <unsigned nthdsPerCTA>
22 | __launch_bounds__(nthdsPerCTA)
23 |     __global__ void reorgKernel(
24 |         const float* input, // input tensor of shape (batch, C, H, W)
25 |         const int volume,   // note that volumes of input and output tensors are the same
26 |         reduced_divisor batch,
27 |         reduced_divisor C,
28 |         reduced_divisor H,
29 |         reduced_divisor W,
30 |         reduced_divisor C_out,
31 |         reduced_divisor stride,
32 |         float* output) // output tensor of shape (batch, C * stride * stride, H / stride, W / stride)
33 | {
34 |     /*
35 |      * Reference
36 |      * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas_kernels.cu#L370
37 |      * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas.c#L9
38 |      */
39 | 
40 |     // outIndex is row-major position of input coordinates
41 |     for (int outIndex = blockIdx.x * nthdsPerCTA + threadIdx.x; outIndex < volume; outIndex += nthdsPerCTA)
42 |     {
43 |         int i = outIndex;
44 | 
45 |         // calculate output coordinates from outIndex
46 |         int outW, outH, outC;
47 |         W.divmod(i, i, outW);
48 |         H.divmod(i, i, outH);
49 |         C.divmod(i, i, outC);
50 |         int outN = i;
51 | 
52 |         // calculate input coordinates based on output coordinates
53 |         // offset is [0, 1, ..., stride * stride - 1] = posH * stride + posW
54 |         int offset, inC, posH, posW;
55 |         C_out.divmod(outC, offset, inC);
56 |         stride.divmod(offset, posH, posW);
57 |         int inH = outH * stride.get() + posH;
58 |         int inW = outW * stride.get() + posW;
59 |         int inN = outN;
60 | 
61 |         // inIndex is row-major position of input coordinates
62 |         int inIndex = inW + W.get() * stride.get() * (inH + H.get() * stride.get() * (inC + C_out.get() * inN));
63 | 
64 |         output[outIndex] = input[inIndex];
65 |     }
66 | }
67 | 
68 | pluginStatus_t reorgGPU(
69 |     cudaStream_t stream,
70 |     const int batch,
71 |     const int C,
72 |     const int H,
73 |     const int W,
74 |     const int stride,
75 |     const float* input,
76 |     float* output)
77 | {
78 |     const int BS = 512;                    // number of threads in one block
79 |     const int volume = batch * C * H * W;  // size of input tensor
80 |     const int GS = (volume + BS - 1) / BS; // number of blocks to launch, calculated so global number of threads is >= volume
81 | 
82 |     reduced_divisor C_out(C / (stride * stride));
83 |     reorgKernel<BS><<<GS, BS, 0, stream>>>(input, volume, reduced_divisor(batch), reduced_divisor(C), reduced_divisor(H), reduced_divisor(W), C_out, reduced_divisor(stride), output);
84 |     return STATUS_SUCCESS;
85 | }
86 | 
87 | pluginStatus_t reorgInference(
88 |     cudaStream_t stream,
89 |     const int batch,
90 |     const int C,
91 |     const int H,
92 |     const int W,
93 |     const int stride,
94 |     const void* input,
95 |     void* output)
96 | {
97 |     return reorgGPU(stream, batch, C, H, W, stride, (const float*) input, (float*) output);
98 | }
99 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/logger.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "logger.h"
18 | #include "logging.h"
19 | 
20 | namespace sample
21 | {
22 | Logger gLogger{Logger::Severity::kINFO};
23 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
24 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
25 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
26 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
27 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
28 | 
29 | void setReportableSeverity(Logger::Severity severity)
30 | {
31 |     gLogger.setReportableSeverity(severity);
32 |     gLogVerbose.setReportableSeverity(severity);
33 |     gLogInfo.setReportableSeverity(severity);
34 |     gLogWarning.setReportableSeverity(severity);
35 |     gLogError.setReportableSeverity(severity);
36 |     gLogFatal.setReportableSeverity(severity);
37 | }
38 | } // namespace sample
39 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/logger.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef LOGGER_H
18 | #define LOGGER_H
19 | 
20 | #include "logging.h"
21 | 
22 | namespace sample
23 | {
24 |     extern Logger gLogger;
25 |     extern LogStreamConsumer gLogVerbose;
26 |     extern LogStreamConsumer gLogInfo;
27 |     extern LogStreamConsumer gLogWarning;
28 |     extern LogStreamConsumer gLogError;
29 |     extern LogStreamConsumer gLogFatal;
30 | 
31 |     void setReportableSeverity(Logger::Severity severity);
32 |     } // namespace sample
33 | 
34 | #endif // LOGGER_H
35 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/nmsHelper.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "plugin.h"
18 | #include "cuda_fp16.h"
19 | #include <algorithm>
20 | 
21 | using namespace nvinfer1;
22 | using namespace nvinfer1::plugin;
23 | 
24 | size_t detectionForwardBBoxDataSize(int N, int C1, DataType DT_BBOX)
25 | {
26 |     if (DT_BBOX == DataType::kFLOAT)
27 |     {
28 |         return N * C1 * sizeof(float);
29 |     }
30 |     if (DT_BBOX == DataType::kHALF)
31 |     {
32 |         return N * C1 * sizeof(__half);
33 |     }
34 | 
35 |     printf("Only FP32/FP16 type bounding boxes are supported.\n");
36 |     return (size_t) -1;
37 | }
38 | 
39 | size_t detectionForwardBBoxPermuteSize(bool shareLocation, int N, int C1, DataType DT_BBOX)
40 | {
41 |     if (DT_BBOX == DataType::kFLOAT)
42 |     {
43 |         return shareLocation ? 0 : N * C1 * sizeof(float);
44 |     }
45 |     if (DT_BBOX == DataType::kHALF)
46 |     {
47 |         return shareLocation ? 0 : N * C1 * sizeof(__half);
48 |     }
49 | 
50 |     printf("Only FP32/FP16 type bounding boxes are supported.\n");
51 |     return (size_t) -1;
52 | }
53 | 
54 | size_t detectionForwardPreNMSSize(int N, int C2)
55 | {
56 |     ASSERT(sizeof(float) == sizeof(int));
57 |     return N * C2 * sizeof(float);
58 | }
59 | 
60 | size_t detectionForwardPostNMSSize(int N, int numClasses, int topK)
61 | {
62 |     ASSERT(sizeof(float) == sizeof(int));
63 |     return N * numClasses * topK * sizeof(float);
64 | }
65 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/nmsUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef TRT_NMS_UTILS_H
18 | #define TRT_NMS_UTILS_H
19 | 
20 | #include "plugin.h"
21 | 
22 | using namespace nvinfer1;
23 | using namespace nvinfer1::plugin;
24 | 
25 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
26 |     int topK, DataType DT_BBOX, DataType DT_SCORE);
27 | #endif
28 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/parserOnnxConfig.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef PARSER_ONNX_CONFIG_H
 18 | #define PARSER_ONNX_CONFIG_H
 19 | 
 20 | #include <cstring>
 21 | #include <iostream>
 22 | #include <string>
 23 | 
 24 | #include "NvInfer.h"
 25 | #include "NvOnnxConfig.h"
 26 | #include "NvOnnxParser.h"
 27 | 
 28 | #define ONNX_DEBUG 1
 29 | 
 30 | /**
 31 |  * \class ParserOnnxConfig
 32 |  * \brief Configuration Manager Class Concrete Implementation
 33 |  *
 34 |  * \note:
 35 |  *
 36 |  */
 37 | 
 38 | using namespace std;
 39 | 
 40 | class ParserOnnxConfig : public nvonnxparser::IOnnxConfig
 41 | {
 42 | 
 43 | protected:
 44 |     string mModelFilename{};
 45 |     string mTextFilename{};
 46 |     string mFullTextFilename{};
 47 |     nvinfer1::DataType mModelDtype;
 48 |     nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
 49 |     bool mPrintLayercInfo;
 50 | 
 51 | public:
 52 |     ParserOnnxConfig()
 53 |         : mModelDtype(nvinfer1::DataType::kFLOAT)
 54 |         , mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING))
 55 |         , mPrintLayercInfo(false)
 56 |     {
 57 | #ifdef ONNX_DEBUG
 58 |         if (isDebug())
 59 |         {
 60 |             std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
 61 |         }
 62 | #endif
 63 |     }
 64 | 
 65 | protected:
 66 |     ~ParserOnnxConfig()
 67 |     {
 68 | #ifdef ONNX_DEBUG
 69 |         if (isDebug())
 70 |         {
 71 |             std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
 72 |         }
 73 | #endif
 74 |     }
 75 | 
 76 | public:
 77 |     virtual void setModelDtype(const nvinfer1::DataType modelDtype)
 78 |     {
 79 |         mModelDtype = modelDtype;
 80 |     }
 81 | 
 82 |     virtual nvinfer1::DataType getModelDtype() const
 83 |     {
 84 |         return mModelDtype;
 85 |     }
 86 | 
 87 |     virtual const char* getModelFileName() const
 88 |     {
 89 |         return mModelFilename.c_str();
 90 |     }
 91 |     virtual void setModelFileName(const char* onnxFilename)
 92 |     {
 93 |         mModelFilename = string(onnxFilename);
 94 |     }
 95 |     virtual nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const
 96 |     {
 97 |         return mVerbosity;
 98 |     }
 99 |     virtual void addVerbosity()
100 |     {
101 |         ++mVerbosity;
102 |     }
103 |     virtual void reduceVerbosity()
104 |     {
105 |         --mVerbosity;
106 |     }
107 |     virtual void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity)
108 |     {
109 |         mVerbosity = verbosity;
110 |     }
111 | 
112 |     virtual const char* getTextFileName() const
113 |     {
114 |         return mTextFilename.c_str();
115 |     }
116 |     virtual void setTextFileName(const char* textFilename)
117 |     {
118 |         mTextFilename = string(textFilename);
119 |     }
120 |     virtual const char* getFullTextFileName() const
121 |     {
122 |         return mFullTextFilename.c_str();
123 |     }
124 |     virtual void setFullTextFileName(const char* fullTextFilename)
125 |     {
126 |         mFullTextFilename = string(fullTextFilename);
127 |     }
128 |     virtual bool getPrintLayerInfo() const
129 |     {
130 |         return mPrintLayercInfo;
131 |     }
132 |     virtual void setPrintLayerInfo(bool src)
133 |     {
134 |         mPrintLayercInfo = src;
135 |     } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
136 | 
137 |     virtual bool isDebug() const
138 |     {
139 | #if ONNX_DEBUG
140 |         return (std::getenv("ONNX_DEBUG") ? true : false);
141 | #else
142 |         return false;
143 | #endif
144 |     }
145 | 
146 |     virtual void destroy()
147 |     {
148 |         delete this;
149 |     }
150 | 
151 | }; // class ParserOnnxConfig
152 | 
153 | #endif
154 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/pluginLogger.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef PLUGIN_LOGGER_H
18 | #define PLUGIN_LOGGER_H
19 | 
20 | #include "pluginLogging.h"
21 | 
22 | namespace
23 | {
24 | Logger gLogger{Logger::Severity::kINFO};
25 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
26 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
27 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
28 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
29 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
30 | } // namespace
31 | 
32 | #endif // PLUGIN_LOGGER_H
33 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/reducedMathPlugin.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | namespace nvinfer1
18 | {
19 | namespace plugin
20 | {
21 | namespace detail
22 | {
23 | 
24 | // Count leading zeros - start from most significant bit.
25 | int clz(int x)
26 | {
27 |     for (int i = 31; i >= 0; --i)
28 |     {
29 |         if ((1 << i) & x)
30 |         {
31 |             return 31 - i;
32 |         }
33 |     }
34 |     return 32;
35 | }
36 | 
37 | #define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) -1)))
38 | 
39 | int find_log_2(int x, bool round_up = false)
40 | {
41 |     int a = 31 - clz(x);
42 |     if (round_up)
43 |     {
44 |         a += !CUDNN_IS_POW_2(x);
45 |     }
46 |     return a;
47 | }
48 | 
49 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff)
50 | {
51 |     if (denom == 0)
52 |     {
53 |         return;
54 |     }
55 |     if (denom == 1)
56 |     {
57 |         // if dividing by 1, reduced math doesn't work because mul_coeff would
58 |         // need to be 2^32, which doesn't fit into unsigned int.  the div()
59 |         // routine handles this special case separately.
60 |         mul_coeff = 0;
61 |         shift_coeff = 0;
62 |         return;
63 |     }
64 |     // To express the division N/D in terms of a multiplication, what we first
65 |     // imagine is simply N*(1/D).  However, 1/D will always evaluate to 0 (for D>1),
66 |     // so we need another way.  There's nothing that says we have to use exactly
67 |     // the fraction 1/D; instead it could be any X/Y that reduces to 1/D (i.e.,
68 |     // Y=X*D), or at least to "close enough" to it.  If we pick Y that is a power
69 |     // of two, then the N*(X/Y) can be N*X followed by a right-shift by some amount.
70 |     // The power of two we should pick should be at least 2^32, because in the
71 |     // div() routine we'll use umulhi(), which returns only the upper 32 bits --
72 |     // this being equivalent to a right-shift by 32.  But we might want a higher
73 |     // power of two for better accuracy depending on the magnitude of the denominator.
74 |     // Once we've picked Y, then X [our mul_coeff value] is simply Y/D, rounding up,
75 |     // and we save shift_coeff as whatever further shift we have to do beyond
76 |     // what the umulhi() implies.
77 |     unsigned int p = 31 + find_log_2(denom, true);
78 |     unsigned int m = ((1ull << p) + (unsigned int) denom - 1) / (unsigned int) denom;
79 |     mul_coeff = m;
80 |     shift_coeff = p - 32;
81 | }
82 | 
83 | } // namespace detail
84 | 
85 | } // namespace plugin
86 | 
87 | } // namespace nvinfer1
88 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/sampleEngines.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef TRT_SAMPLE_ENGINES_H
 18 | #define TRT_SAMPLE_ENGINES_H
 19 | 
 20 | #include <iostream>
 21 | 
 22 | #include "NvCaffeParser.h"
 23 | #include "NvInfer.h"
 24 | #include "NvOnnxParser.h"
 25 | #include "NvUffParser.h"
 26 | 
 27 | #include "sampleOptions.h"
 28 | #include "sampleUtils.h"
 29 | 
 30 | namespace sample
 31 | {
 32 | 
 33 | struct Parser
 34 | {
 35 |     TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
 36 |     TrtUniquePtr<nvuffparser::IUffParser> uffParser;
 37 |     TrtUniquePtr<nvonnxparser::IParser> onnxParser;
 38 | 
 39 |     operator bool() const
 40 |     {
 41 |         return caffeParser || uffParser || onnxParser;
 42 |     }
 43 | };
 44 | 
 45 | //!
 46 | //! \brief Generate a network definition for a given model
 47 | //!
 48 | //! \return Parser The parser used to initialize the network and that holds the weights for the network, or an invalid
 49 | //! parser (the returned parser converts to false if tested)
 50 | //!
 51 | //! \see Parser::operator bool()
 52 | //!
 53 | Parser modelToNetwork(const ModelOptions& model, nvinfer1::INetworkDefinition& network, std::ostream& err);
 54 | 
 55 | //!
 56 | //! \brief Create an engine for a network defintion
 57 | //!
 58 | //! \return Pointer to the engine created or nullptr if the creation failed
 59 | //!
 60 | nvinfer1::ICudaEngine* networkToEngine(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder,
 61 |     nvinfer1::INetworkDefinition& network, std::ostream& err);
 62 | 
 63 | //!
 64 | //! \brief Create an engine for a given model
 65 | //!
 66 | //! \return Pointer to the engine created or nullptr if the creation failed
 67 | //!
 68 | nvinfer1::ICudaEngine* modelToEngine(
 69 |     const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
 70 | 
 71 | //!
 72 | //! \brief Log refittable layers and weights of a refittable engine
 73 | //!
 74 | void dumpRefittable(nvinfer1::ICudaEngine& engine);
 75 | 
 76 | //!
 77 | //! \brief Load a serialized engine
 78 | //!
 79 | //! \return Pointer to the engine loaded or nullptr if the operation failed
 80 | //!
 81 | nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, std::ostream& err);
 82 | 
 83 | //!
 84 | //! \brief Save an engine into a file
 85 | //!
 86 | //! \return boolean Return true if the engine was successfully saved
 87 | //!
 88 | bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName, std::ostream& err);
 89 | 
 90 | //!
 91 | //! \brief Create an engine from model or serialized file, and optionally save engine
 92 | //!
 93 | //! \return Pointer to the engine created or nullptr if the creation failed
 94 | //!
 95 | TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(
 96 |     const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
 97 | 
 98 | } // namespace sample
 99 | 
100 | #endif // TRT_SAMPLE_ENGINES_H
101 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/sampleInference.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef TRT_SAMPLE_INFERENCE_H
18 | #define TRT_SAMPLE_INFERENCE_H
19 | 
20 | #include <iostream>
21 | #include <memory>
22 | #include <string>
23 | #include <vector>
24 | 
25 | #include "NvInfer.h"
26 | 
27 | #include "sampleReporting.h"
28 | #include "sampleUtils.h"
29 | 
30 | namespace sample
31 | {
32 | 
33 | struct InferenceEnvironment
34 | {
35 |     TrtUniquePtr<nvinfer1::ICudaEngine> engine;
36 |     std::unique_ptr<Profiler> profiler;
37 |     std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
38 |     std::vector<std::unique_ptr<Bindings>> bindings;
39 | };
40 | 
41 | //!
42 | //! \brief Set up contexts and bindings for inference
43 | //!
44 | bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference);
45 | 
46 | //!
47 | //! \brief Run inference and collect timing
48 | //!
49 | void runInference(
50 |     const InferenceOptions& inference, InferenceEnvironment& iEnv, int device, std::vector<InferenceTrace>& trace);
51 | 
52 | } // namespace sample
53 | 
54 | #endif // TRT_SAMPLE_INFERENCE_H
55 | 


--------------------------------------------------------------------------------
/einsum/einsum_common7/serialize.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cassert>
 20 | #include <cstring>
 21 | #include <type_traits>
 22 | #include <vector>
 23 | 
 24 | #include <iostream>
 25 | using std::cerr;
 26 | using std::cout;
 27 | using std::endl;
 28 | 
 29 | template <typename T>
 30 | inline void serialize_value(void** buffer, T const& value);
 31 | 
 32 | template <typename T>
 33 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value);
 34 | 
 35 | namespace
 36 | {
 37 | 
 38 | template <typename T, class Enable = void>
 39 | struct Serializer
 40 | {
 41 | };
 42 | 
 43 | template <typename T>
 44 | struct Serializer<T,
 45 |     typename std::enable_if<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_pod<T>::value>::type>
 46 | {
 47 |     static size_t serialized_size(T const& value)
 48 |     {
 49 |         return sizeof(T);
 50 |     }
 51 |     static void serialize(void** buffer, T const& value)
 52 |     {
 53 |         ::memcpy(*buffer, &value, sizeof(T));
 54 |         reinterpret_cast<char*&>(*buffer) += sizeof(T);
 55 |     }
 56 |     static void deserialize(void const** buffer, size_t* buffer_size, T* value)
 57 |     {
 58 |         assert(*buffer_size >= sizeof(T));
 59 |         ::memcpy(value, *buffer, sizeof(T));
 60 |         reinterpret_cast<char const*&>(*buffer) += sizeof(T);
 61 |         *buffer_size -= sizeof(T);
 62 |     }
 63 | };
 64 | 
 65 | template <>
 66 | struct Serializer<const char*>
 67 | {
 68 |     static size_t serialized_size(const char* value)
 69 |     {
 70 |         return strlen(value) + 1;
 71 |     }
 72 |     static void serialize(void** buffer, const char* value)
 73 |     {
 74 |         ::strcpy(static_cast<char*>(*buffer), value);
 75 |         reinterpret_cast<char*&>(*buffer) += strlen(value) + 1;
 76 |     }
 77 |     static void deserialize(void const** buffer, size_t* buffer_size, const char** value)
 78 |     {
 79 |         *value = static_cast<char const*>(*buffer);
 80 |         size_t data_size = strnlen(*value, *buffer_size) + 1;
 81 |         assert(*buffer_size >= data_size);
 82 |         reinterpret_cast<char const*&>(*buffer) += data_size;
 83 |         *buffer_size -= data_size;
 84 |     }
 85 | };
 86 | 
 87 | template <typename T>
 88 | struct Serializer<std::vector<T>,
 89 |     typename std::enable_if<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_pod<T>::value>::type>
 90 | {
 91 |     static size_t serialized_size(std::vector<T> const& value)
 92 |     {
 93 |         return sizeof(value.size()) + value.size() * sizeof(T);
 94 |     }
 95 |     static void serialize(void** buffer, std::vector<T> const& value)
 96 |     {
 97 |         serialize_value(buffer, value.size());
 98 |         size_t nbyte = value.size() * sizeof(T);
 99 |         ::memcpy(*buffer, value.data(), nbyte);
100 |         reinterpret_cast<char*&>(*buffer) += nbyte;
101 |     }
102 |     static void deserialize(void const** buffer, size_t* buffer_size, std::vector<T>* value)
103 |     {
104 |         size_t size;
105 |         deserialize_value(buffer, buffer_size, &size);
106 |         value->resize(size);
107 |         size_t nbyte = value->size() * sizeof(T);
108 |         assert(*buffer_size >= nbyte);
109 |         ::memcpy(value->data(), *buffer, nbyte);
110 |         reinterpret_cast<char const*&>(*buffer) += nbyte;
111 |         *buffer_size -= nbyte;
112 |     }
113 | };
114 | 
115 | } // namespace
116 | 
117 | template <typename T>
118 | inline size_t serialized_size(T const& value)
119 | {
120 |     return Serializer<T>::serialized_size(value);
121 | }
122 | 
123 | template <typename T>
124 | inline void serialize_value(void** buffer, T const& value)
125 | {
126 |     return Serializer<T>::serialize(buffer, value);
127 | }
128 | 
129 | template <typename T>
130 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value)
131 | {
132 |     return Serializer<T>::deserialize(buffer, buffer_size, value);
133 | }
134 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(kernels)
2 | aux_source_directory(./ DIR_LIB_SRCS)
3 | add_library(einsum_common_lib ${DIR_LIB_SRCS})


--------------------------------------------------------------------------------
/einsum/einsum_common8/bboxUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef TRT_BBOX_UTILS_H
18 | #define TRT_BBOX_UTILS_H
19 | 
20 | #include "plugin.h"
21 | 
22 | using namespace nvinfer1;
23 | using namespace nvinfer1::plugin;
24 | 
25 | template <typename T>
26 | struct Bbox
27 | {
28 |     T xmin, ymin, xmax, ymax;
29 |     Bbox(T xmin, T ymin, T xmax, T ymax)
30 |         : xmin(xmin)
31 |         , ymin(ymin)
32 |         , xmax(xmax)
33 |         , ymax(ymax)
34 |     {
35 |     }
36 |     Bbox() = default;
37 | };
38 | 
39 | template <typename T>
40 | struct BboxInfo
41 | {
42 |     T conf_score;
43 |     int label;
44 |     int bbox_idx;
45 |     bool kept;
46 |     BboxInfo(T conf_score, int label, int bbox_idx, bool kept)
47 |         : conf_score(conf_score)
48 |         , label(label)
49 |         , bbox_idx(bbox_idx)
50 |         , kept(kept)
51 |     {
52 |     }
53 |     BboxInfo() = default;
54 | };
55 | 
56 | template <typename TFloat>
57 | bool operator<(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
58 | {
59 |     return lhs.x1 < rhs.x1;
60 | }
61 | 
62 | template <typename TFloat>
63 | bool operator==(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
64 | {
65 |     return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2;
66 | }
67 | // }}}
68 | 
69 | int8_t* alignPtr(int8_t* ptr, uintptr_t to);
70 | 
71 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize);
72 | 
73 | size_t dataTypeSize(DataType dtype);
74 | 
75 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets);
76 | 
77 | #endif
78 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/checkMacrosPlugin.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #include "checkMacrosPlugin.h"
 18 | #include <cstdlib>
 19 | #include <cublas_v2.h>
 20 | #include <cuda_runtime.h>
 21 | 
 22 | namespace nvinfer1
 23 | {
 24 | namespace plugin
 25 | {
 26 | 
 27 | // This will be populated by the logger supplied by the user to initLibNvInferPlugins()
 28 | ILogger* gLogger{};
 29 | 
 30 | template <ILogger::Severity kSeverity>
 31 | int LogStream<kSeverity>::Buf::sync()
 32 | {
 33 |     std::string s = str();
 34 |     while (!s.empty() && s.back() == '\n')
 35 |     {
 36 |         s.pop_back();
 37 |     }
 38 |     if (gLogger != nullptr)
 39 |     {
 40 |         gLogger->log(kSeverity, s.c_str());
 41 |     }
 42 |     str("");
 43 |     return 0;
 44 | }
 45 | 
 46 | // These use gLogger, and therefore require initLibNvInferPlugins() to be called with a logger
 47 | // (otherwise, it will not log)
 48 | LogStream<ILogger::Severity::kERROR> gLogError;
 49 | LogStream<ILogger::Severity::kWARNING> gLogWarning;
 50 | LogStream<ILogger::Severity::kINFO> gLogInfo;
 51 | LogStream<ILogger::Severity::kVERBOSE> gLogVerbose;
 52 | 
 53 | // break-pointable
 54 | void throwCudaError(const char* file, const char* function, int line, int status, const char* msg)
 55 | {
 56 |     CudaError error(file, function, line, status, msg);
 57 |     error.log(gLogError);
 58 |     throw error;
 59 | }
 60 | 
 61 | // break-pointable
 62 | void throwCublasError(const char* file, const char* function, int line, int status, const char* msg)
 63 | {
 64 |     if (msg == nullptr)
 65 |     {
 66 |         auto s_ = static_cast<cublasStatus_t>(status);
 67 |         switch (s_)
 68 |         {
 69 |         case CUBLAS_STATUS_SUCCESS: msg = "CUBLAS_STATUS_SUCCESS"; break;
 70 |         case CUBLAS_STATUS_NOT_INITIALIZED: msg = "CUBLAS_STATUS_NOT_INITIALIZED"; break;
 71 |         case CUBLAS_STATUS_ALLOC_FAILED: msg = "CUBLAS_STATUS_ALLOC_FAILED"; break;
 72 |         case CUBLAS_STATUS_INVALID_VALUE: msg = "CUBLAS_STATUS_INVALID_VALUE"; break;
 73 |         case CUBLAS_STATUS_ARCH_MISMATCH: msg = "CUBLAS_STATUS_ARCH_MISMATCH"; break;
 74 |         case CUBLAS_STATUS_MAPPING_ERROR: msg = "CUBLAS_STATUS_MAPPING_ERROR"; break;
 75 |         case CUBLAS_STATUS_EXECUTION_FAILED: msg = "CUBLAS_STATUS_EXECUTION_FAILED"; break;
 76 |         case CUBLAS_STATUS_INTERNAL_ERROR: msg = "CUBLAS_STATUS_INTERNAL_ERROR"; break;
 77 |         case CUBLAS_STATUS_NOT_SUPPORTED: msg = "CUBLAS_STATUS_NOT_SUPPORTED"; break;
 78 |         case CUBLAS_STATUS_LICENSE_ERROR: msg = "CUBLAS_STATUS_LICENSE_ERROR"; break;
 79 |         }
 80 |     }
 81 |     CublasError error(file, function, line, status, msg);
 82 |     error.log(gLogError);
 83 |     throw error;
 84 | }
 85 | 
 86 | // break-pointable
 87 | void throwCudnnError(const char* file, const char* function, int line, int status, const char* msg)
 88 | {
 89 |     CudnnError error(file, function, line, status, msg);
 90 |     error.log(gLogError);
 91 |     throw error;
 92 | }
 93 | 
 94 | void logError(const char* msg, const char* file, const char* fn, int line)
 95 | {
 96 |     gLogError << "Parameter check failed at: " << file << "::" << fn << "::" << line;
 97 |     gLogError << ", condition: " << msg << std::endl;
 98 | }
 99 | 
100 | // break-pointable
101 | void reportAssertion(const char* msg, const char* file, int line)
102 | {
103 |     std::ostringstream stream;
104 |     stream << "Assertion failed: " << msg << std::endl
105 |            << file << ':' << line << std::endl
106 |            << "Aborting..." << std::endl;
107 |     getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str());
108 |     cudaDeviceReset();
109 |     abort();
110 | }
111 | 
112 | void TRTException::log(std::ostream& logStream) const
113 | {
114 |     logStream << file << " (" << line << ") - " << name << " Error in " << function << ": " << status;
115 |     if (message != nullptr)
116 |     {
117 |         logStream << " (" << message << ")";
118 |     }
119 |     logStream << std::endl;
120 | }
121 | 
122 | } // namespace plugin
123 | 
124 | } // namespace nvinfer1
125 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/cub_helper.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "kernel.h"
18 | template <typename KeyT, typename ValueT>
19 | size_t cubSortPairsWorkspaceSize(int num_items, int num_segments)
20 | {
21 |     size_t temp_storage_bytes = 0;
22 |     cub::DeviceSegmentedRadixSort::SortPairsDescending((void*) NULL, temp_storage_bytes, (const KeyT*) NULL,
23 |         (KeyT*) NULL, (const ValueT*) NULL, (ValueT*) NULL,
24 |         num_items,    // # items
25 |         num_segments, // # segments
26 |         (const int*) NULL, (const int*) NULL);
27 |     return temp_storage_bytes;
28 | }
29 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/kernels/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | file(GLOB SRCS *.cpp)
17 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
18 | set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
19 | file(GLOB CU_SRCS *.cu)
20 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} ${CU_SRCS})
21 | set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} PARENT_SCOPE)
22 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/kernels/extractFgScores.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "kernel.h"
17 | 
18 | template <typename T>
19 | pluginStatus_t extractFgScores_gpu(cudaStream_t stream,
20 |                                   int N,
21 |                                   int A,
22 |                                   int H,
23 |                                   int W,
24 |                                   const void* scores,
25 |                                   void* fgScores)
26 | {
27 |     // Copy all the objectness scores for one batch
28 |     size_t size = A * H * W * sizeof(T);
29 |     for (int n = 0; n < N; n++)
30 |     {
31 |         // Find out the starting pointer of the objectness scores in the input
32 |         size_t offset_ld = (n * 2 + 1) * A * H * W;
33 |         // Find out the starting pointer of the objectness scores in the output
34 |         size_t offset_st = n * A * H * W;
35 |         CSC(cudaMemcpyAsync(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, cudaMemcpyDeviceToDevice, stream), STATUS_FAILURE);
36 |     }
37 | 
38 |     return STATUS_SUCCESS;
39 | }
40 | 
41 | template <typename T>
42 | pluginStatus_t extractFgScores_cpu(int N,
43 |                                   int A,
44 |                                   int H,
45 |                                   int W,
46 |                                   const void* scores,
47 |                                   void* fgScores)
48 | {
49 |     size_t size = A * H * W * sizeof(T);
50 |     for (int n = 0; n < N; n++)
51 |     {
52 |         size_t offset_ld = (n * 2 + 1) * A * H * W;
53 |         size_t offset_st = n * A * H * W;
54 |         memcpy(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size);
55 |     }
56 |     return STATUS_SUCCESS;
57 | }
58 | 
59 | pluginStatus_t extractFgScores(cudaStream_t stream,
60 |                               const int N,
61 |                               const int A,
62 |                               const int H,
63 |                               const int W,
64 |                               const DataType t_scores,
65 |                               const DLayout_t l_scores,
66 |                               const void* scores,
67 |                               const DataType t_fgScores,
68 |                               const DLayout_t l_fgScores,
69 |                               void* fgScores)
70 | {
71 |     if (l_fgScores != NCHW || l_scores != NCHW)
72 |         return STATUS_BAD_PARAM;
73 | 
74 |     if (t_fgScores != DataType::kFLOAT)
75 |         return STATUS_BAD_PARAM;
76 | 
77 |     if (t_scores != DataType::kFLOAT)
78 |         return STATUS_BAD_PARAM;
79 | 
80 |     return extractFgScores_gpu<float>(stream, N, A, H, W, scores, fgScores);
81 | }
82 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/kernels/generateAnchors.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include "kernel.h"
 17 | #include <cstdio>
 18 | 
 19 | pluginStatus_t generateAnchors_cpu(int numRatios,
 20 |                                   float* ratios,
 21 |                                   int numScales,
 22 |                                   float* scales,
 23 |                                   int baseSize,
 24 |                                   float* anchors)
 25 | {
 26 | #ifdef DEBUG
 27 |     DEBUG_PRINTF("Generating Anchors with:\n");
 28 |     DEBUG_PRINTF("Scales:");
 29 |     for (int s = 0; s < numScales; ++s)
 30 |     {
 31 |         DEBUG_PRINTF("%f\t", scales[s]);
 32 |     }
 33 |     DEBUG_PRINTF("\n");
 34 |     DEBUG_PRINTF("Ratios:");
 35 |     for (int r = 0; r < numRatios; ++r)
 36 |     {
 37 |         DEBUG_PRINTF("%f\t", ratios[r]);
 38 |     }
 39 |     DEBUG_PRINTF("\n");
 40 | #endif
 41 | 
 42 |     if ((numScales <= 0) || (numRatios <= 0) || (baseSize <= 0))
 43 |     {
 44 |         return STATUS_BAD_PARAM;
 45 |     }
 46 | 
 47 |     // Generate parameters for numRatios * numScales general anchor boxes
 48 |     for (int r = 0; r < numRatios; ++r)
 49 |     {
 50 |         for (int s = 0; s < numScales; ++s)
 51 |         {
 52 |             int id = r * numScales + s;
 53 |             float scale = scales[s];
 54 |             float ratio = ratios[r];
 55 |             float bs = baseSize;
 56 |             float ws = round(sqrt((float) (bs * bs) / ratio));
 57 |             float hs = round(ws * ratio);
 58 |             // Width: bs / sqrt(ratio) * scale
 59 |             // Height: bs * sqrt(ratio) * scale
 60 |             ws *= scale;
 61 |             hs *= scale;
 62 | 
 63 |             // x_anchor_ctr
 64 |             /*
 65 |              * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes.
 66 |              * Because the center of anchor box in the original input raw image scale will not be dependent on this. 
 67 |              */
 68 |             anchors[id * 4] = (bs - 1) / 2;
 69 |             // y_anchor_ctr
 70 |             /*
 71 |              * This value should not useful in this implementation of generating numRatios * numScales general anchor boxes.
 72 |              * Because the center of anchor box in the original input raw image scale will not be dependent on this. 
 73 |              */
 74 |             anchors[id * 4 + 1] = (bs - 1) / 2;
 75 |             // w_anchor
 76 |             anchors[id * 4 + 2] = ws;
 77 |             // h_anchor
 78 |             anchors[id * 4 + 3] = hs;
 79 |         }
 80 |     }
 81 |     return STATUS_SUCCESS;
 82 | }
 83 | 
 84 | pluginStatus_t generateAnchors(cudaStream_t stream,
 85 |                               int numRatios,
 86 |                               float* ratios,
 87 |                               int numScales,
 88 |                               float* scales,
 89 |                               int baseSize,
 90 |                               float* anchors)
 91 | {
 92 |     // Each anchor box has 4 parameters
 93 |     int ac = numRatios * numScales * 4;
 94 |     float* anchors_cpu;
 95 |     cudaMallocHost((void**) &anchors_cpu, sizeof(float) * ac);
 96 |     pluginStatus_t status = generateAnchors_cpu(numRatios, ratios, numScales, scales, baseSize, anchors_cpu);
 97 |     cudaMemcpyAsync(anchors, anchors_cpu, sizeof(float) * ac, cudaMemcpyHostToDevice, stream);
 98 |     cudaFreeHost(anchors_cpu);
 99 |     return status;
100 | }
101 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/kernels/kernel.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "kernel.h"
18 | #include "plugin.h"
19 | 
20 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
21 |     int topK, DataType DT_BBOX, DataType DT_SCORE)
22 | {
23 |     size_t wss[7];
24 |     wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX);
25 |     wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX);
26 |     wss[2] = detectionForwardPreNMSSize(N, C2);
27 |     wss[3] = detectionForwardPreNMSSize(N, C2);
28 |     wss[4] = detectionForwardPostNMSSize(N, numClasses, topK);
29 |     wss[5] = detectionForwardPostNMSSize(N, numClasses, topK);
30 |     wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE),
31 |         sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE));
32 |     return calculateTotalWorkspaceSize(wss, 7);
33 | }
34 | 
35 | namespace nvinfer1
36 | {
37 | namespace plugin
38 | {
39 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
40 |     int topK, DataType DT_BBOX, DataType DT_SCORE)
41 | {
42 |     size_t wss[7];
43 |     wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX);
44 |     wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX);
45 |     wss[2] = detectionForwardPreNMSSize(N, C2);
46 |     wss[3] = detectionForwardPreNMSSize(N, C2);
47 |     wss[4] = detectionForwardPostNMSSize(N, numClasses, topK);
48 |     wss[5] = detectionForwardPostNMSSize(N, numClasses, topK);
49 |     wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE),
50 |         sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE));
51 |     return calculateTotalWorkspaceSize(wss, 7);
52 | }
53 | } // namespace plugin
54 | } // namespace nvinfer1
55 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/kernels/lReLU.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "kernel.h"
18 | 
19 | template <unsigned nthdsPerCTA>
20 | __launch_bounds__(nthdsPerCTA) __global__
21 |     void pReLUKernel(const int n, const float negativeSlope, const float* input, float* output)
22 | {
23 |     for (int i = blockIdx.x * nthdsPerCTA + threadIdx.x; i < n; i += gridDim.x * nthdsPerCTA)
24 |     {
25 |         output[i] = input[i] > 0 ? input[i] : input[i] * negativeSlope;
26 |     }
27 | }
28 | 
29 | pluginStatus_t lReLUGPU(cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output)
30 | {
31 |     const int BS = 512;
32 |     const int GS = (n + BS - 1) / BS;
33 |     pReLUKernel<BS><<<GS, BS, 0, stream>>>(n, negativeSlope,
34 |                                            (const float*) input,
35 |                                            (float*) output);
36 |     return STATUS_SUCCESS;
37 | }
38 | 
39 | pluginStatus_t lReLUInference(
40 |     cudaStream_t stream, const int n, const float negativeSlope, const void* input, void* output)
41 | {
42 |     return lReLUGPU(stream, n, negativeSlope, (const float*) input, (float*) output);
43 | }
44 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/kernels/reducedMathPlugin.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef _REDUCED_MATH_PLUGIN_H
 18 | #define _REDUCED_MATH_PLUGIN_H
 19 | // Dynamically strength-reduced div and mod
 20 | //
 21 | // Ideas taken from Sean Baxter's MGPU library.
 22 | // These classes provide for reduced complexity division and modulus
 23 | // on integers, for the case where the same divisor or modulus will
 24 | // be used repeatedly.
 25 | 
 26 | namespace nvinfer1
 27 | {
 28 | namespace plugin
 29 | {
 30 | namespace detail
 31 | {
 32 | 
 33 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff);
 34 | 
 35 | __host__ __device__ __forceinline__ unsigned int umulhi(unsigned int x, unsigned int y)
 36 | {
 37 | #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 100
 38 |     return __umulhi(x, y);
 39 | #else
 40 |     unsigned long long z = (unsigned long long) x * (unsigned long long) y;
 41 |     return (unsigned int) (z >> 32);
 42 | #endif
 43 | }
 44 | 
 45 | // This is a weird implementation that returns div_up(0,1)=0 but
 46 | // div_up(0,2)=1 (wrong) -- just do not use it with a=0.
 47 | __host__ __device__ inline int div_up(int a, int b)
 48 | {
 49 |     return (a - 1) / b + 1;
 50 | }
 51 | 
 52 | } // end namespace detail
 53 | 
 54 | class reduced_divisor
 55 | {
 56 | public:
 57 |     reduced_divisor() {}
 58 |     __host__ __forceinline__ reduced_divisor(int _y)
 59 |         : y(_y)
 60 |     {
 61 |         detail::find_divisor(y, mul_coeff, shift_coeff);
 62 |     }
 63 |     __host__ __device__ __forceinline__ reduced_divisor(unsigned _mul_coeff, unsigned _shift_coeff, int _y)
 64 |         : mul_coeff(_mul_coeff)
 65 |         , shift_coeff(_shift_coeff)
 66 |         , y(_y)
 67 |     {
 68 |     }
 69 |     __host__ __device__ __forceinline__ int div(int x) const
 70 |     {
 71 |         // if dividing by 1, then find_divisor wouldn't have worked because
 72 |         // mul_coeff would have had to be 2^32, which can't be represented,
 73 |         // so we have to special case that one.
 74 |         return (y != 1) ? detail::umulhi((unsigned int) x, mul_coeff) >> shift_coeff : x;
 75 |     }
 76 |     __host__ __device__ __forceinline__ int mod(int x) const
 77 |     {
 78 |         return x - (div(x) * y);
 79 |     }
 80 |     __host__ __device__ __forceinline__ void divmod(int x, int& q, int& mod) const
 81 |     {
 82 |         q = div(x);
 83 |         mod = x - (q * y);
 84 |     }
 85 |     __host__ __device__ __forceinline__ int get() const
 86 |     {
 87 |         return y;
 88 |     }
 89 |     inline __host__ void get_mul_shift(unsigned& mul, unsigned& shift)
 90 |     {
 91 |         mul = mul_coeff;
 92 |         shift = shift_coeff;
 93 |     }
 94 | 
 95 | protected:
 96 |     unsigned int mul_coeff;
 97 |     unsigned int shift_coeff;
 98 |     int y;
 99 | };
100 | 
101 | } // namespace plugin
102 | 
103 | } // namespace nvinfer1
104 | #endif /*_REDUCED_MATH_PLUGIN_H*/
105 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/kernels/reorgForward.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "reducedMathPlugin.h"
17 | #include "kernel.h"
18 | 
19 | using namespace nvinfer1::plugin; // for reduced_divisor
20 | 
21 | template <unsigned nthdsPerCTA>
22 | __launch_bounds__(nthdsPerCTA)
23 |     __global__ void reorgKernel(
24 |         const float* input, // input tensor of shape (batch, C, H, W)
25 |         const int volume,   // note that volumes of input and output tensors are the same
26 |         reduced_divisor batch,
27 |         reduced_divisor C,
28 |         reduced_divisor H,
29 |         reduced_divisor W,
30 |         reduced_divisor C_out,
31 |         reduced_divisor stride,
32 |         float* output) // output tensor of shape (batch, C * stride * stride, H / stride, W / stride)
33 | {
34 |     /*
35 |      * Reference
36 |      * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas_kernels.cu#L370
37 |      * https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/blas.c#L9
38 |      */
39 | 
40 |     // outIndex is row-major position of input coordinates
41 |     for (int outIndex = blockIdx.x * nthdsPerCTA + threadIdx.x; outIndex < volume; outIndex += nthdsPerCTA)
42 |     {
43 |         int i = outIndex;
44 | 
45 |         // calculate output coordinates from outIndex
46 |         int outW, outH, outC;
47 |         W.divmod(i, i, outW);
48 |         H.divmod(i, i, outH);
49 |         C.divmod(i, i, outC);
50 |         int outN = i;
51 | 
52 |         // calculate input coordinates based on output coordinates
53 |         // offset is [0, 1, ..., stride * stride - 1] = posH * stride + posW
54 |         int offset, inC, posH, posW;
55 |         C_out.divmod(outC, offset, inC);
56 |         stride.divmod(offset, posH, posW);
57 |         int inH = outH * stride.get() + posH;
58 |         int inW = outW * stride.get() + posW;
59 |         int inN = outN;
60 | 
61 |         // inIndex is row-major position of input coordinates
62 |         int inIndex = inW + W.get() * stride.get() * (inH + H.get() * stride.get() * (inC + C_out.get() * inN));
63 | 
64 |         output[outIndex] = input[inIndex];
65 |     }
66 | }
67 | 
68 | pluginStatus_t reorgGPU(
69 |     cudaStream_t stream,
70 |     const int batch,
71 |     const int C,
72 |     const int H,
73 |     const int W,
74 |     const int stride,
75 |     const float* input,
76 |     float* output)
77 | {
78 |     const int BS = 512;                    // number of threads in one block
79 |     const int volume = batch * C * H * W;  // size of input tensor
80 |     const int GS = (volume + BS - 1) / BS; // number of blocks to launch, calculated so global number of threads is >= volume
81 | 
82 |     reduced_divisor C_out(C / (stride * stride));
83 |     reorgKernel<BS><<<GS, BS, 0, stream>>>(input, volume, reduced_divisor(batch), reduced_divisor(C), reduced_divisor(H), reduced_divisor(W), C_out, reduced_divisor(stride), output);
84 |     return STATUS_SUCCESS;
85 | }
86 | 
87 | pluginStatus_t reorgInference(
88 |     cudaStream_t stream,
89 |     const int batch,
90 |     const int C,
91 |     const int H,
92 |     const int W,
93 |     const int stride,
94 |     const void* input,
95 |     void* output)
96 | {
97 |     return reorgGPU(stream, batch, C, H, W, stride, (const float*) input, (float*) output);
98 | }
99 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/logger.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "logger.h"
18 | #include "ErrorRecorder.h"
19 | #include "logging.h"
20 | 
21 | SampleErrorRecorder gRecorder;
22 | namespace sample
23 | {
24 | Logger gLogger{Logger::Severity::kINFO};
25 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
26 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
27 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
28 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
29 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
30 | 
31 | void setReportableSeverity(Logger::Severity severity)
32 | {
33 |     gLogger.setReportableSeverity(severity);
34 |     gLogVerbose.setReportableSeverity(severity);
35 |     gLogInfo.setReportableSeverity(severity);
36 |     gLogWarning.setReportableSeverity(severity);
37 |     gLogError.setReportableSeverity(severity);
38 |     gLogFatal.setReportableSeverity(severity);
39 | }
40 | } // namespace sample
41 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/logger.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef LOGGER_H
18 | #define LOGGER_H
19 | 
20 | #include "logging.h"
21 | 
22 | class SampleErrorRecorder;
23 | extern SampleErrorRecorder gRecorder;
24 | namespace sample
25 | {
26 | extern Logger gLogger;
27 | extern LogStreamConsumer gLogVerbose;
28 | extern LogStreamConsumer gLogInfo;
29 | extern LogStreamConsumer gLogWarning;
30 | extern LogStreamConsumer gLogError;
31 | extern LogStreamConsumer gLogFatal;
32 | 
33 | void setReportableSeverity(Logger::Severity severity);
34 | } // namespace sample
35 | 
36 | #endif // LOGGER_H
37 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/nmsHelper.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "plugin.h"
18 | #include "cuda_fp16.h"
19 | #include <algorithm>
20 | 
21 | using namespace nvinfer1;
22 | using namespace nvinfer1::plugin;
23 | 
24 | size_t detectionForwardBBoxDataSize(int N, int C1, DataType DT_BBOX)
25 | {
26 |     if (DT_BBOX == DataType::kFLOAT)
27 |     {
28 |         return N * C1 * sizeof(float);
29 |     }
30 |     if (DT_BBOX == DataType::kHALF)
31 |     {
32 |         return N * C1 * sizeof(__half);
33 |     }
34 | 
35 |     printf("Only FP32/FP16 type bounding boxes are supported.\n");
36 |     return (size_t) -1;
37 | }
38 | 
39 | size_t detectionForwardBBoxPermuteSize(bool shareLocation, int N, int C1, DataType DT_BBOX)
40 | {
41 |     if (DT_BBOX == DataType::kFLOAT)
42 |     {
43 |         return shareLocation ? 0 : N * C1 * sizeof(float);
44 |     }
45 |     if (DT_BBOX == DataType::kHALF)
46 |     {
47 |         return shareLocation ? 0 : N * C1 * sizeof(__half);
48 |     }
49 | 
50 |     printf("Only FP32/FP16 type bounding boxes are supported.\n");
51 |     return (size_t) -1;
52 | }
53 | 
54 | size_t detectionForwardPreNMSSize(int N, int C2)
55 | {
56 |     ASSERT(sizeof(float) == sizeof(int));
57 |     return N * C2 * sizeof(float);
58 | }
59 | 
60 | size_t detectionForwardPostNMSSize(int N, int numClasses, int topK)
61 | {
62 |     ASSERT(sizeof(float) == sizeof(int));
63 |     return N * numClasses * topK * sizeof(float);
64 | }
65 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/nmsUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef TRT_NMS_UTILS_H
18 | #define TRT_NMS_UTILS_H
19 | 
20 | #include "plugin.h"
21 | 
22 | using namespace nvinfer1;
23 | using namespace nvinfer1::plugin;
24 | 
25 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
26 |     int topK, DataType DT_BBOX, DataType DT_SCORE);
27 | #endif
28 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/parserOnnxConfig.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef PARSER_ONNX_CONFIG_H
 18 | #define PARSER_ONNX_CONFIG_H
 19 | 
 20 | #include <cstring>
 21 | #include <iostream>
 22 | #include <string>
 23 | 
 24 | #include "NvInfer.h"
 25 | #include "NvOnnxConfig.h"
 26 | #include "NvOnnxParser.h"
 27 | 
 28 | #define ONNX_DEBUG 1
 29 | 
 30 | /**
 31 |  * \class ParserOnnxConfig
 32 |  * \brief Configuration Manager Class Concrete Implementation
 33 |  *
 34 |  * \note:
 35 |  *
 36 |  */
 37 | 
 38 | using namespace std;
 39 | 
 40 | class ParserOnnxConfig : public nvonnxparser::IOnnxConfig
 41 | {
 42 | 
 43 | protected:
 44 |     string mModelFilename{};
 45 |     string mTextFilename{};
 46 |     string mFullTextFilename{};
 47 |     nvinfer1::DataType mModelDtype;
 48 |     nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
 49 |     bool mPrintLayercInfo;
 50 | 
 51 | public:
 52 |     ParserOnnxConfig()
 53 |         : mModelDtype(nvinfer1::DataType::kFLOAT)
 54 |         , mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING))
 55 |         , mPrintLayercInfo(false)
 56 |     {
 57 | #ifdef ONNX_DEBUG
 58 |         if (isDebug())
 59 |         {
 60 |             std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
 61 |         }
 62 | #endif
 63 |     }
 64 | 
 65 | protected:
 66 |     ~ParserOnnxConfig()
 67 |     {
 68 | #ifdef ONNX_DEBUG
 69 |         if (isDebug())
 70 |         {
 71 |             std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
 72 |         }
 73 | #endif
 74 |     }
 75 | 
 76 | public:
 77 |     virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept
 78 |     {
 79 |         mModelDtype = modelDtype;
 80 |     }
 81 | 
 82 |     virtual nvinfer1::DataType getModelDtype() const noexcept
 83 |     {
 84 |         return mModelDtype;
 85 |     }
 86 | 
 87 |     virtual const char* getModelFileName() const noexcept
 88 |     {
 89 |         return mModelFilename.c_str();
 90 |     }
 91 |     virtual void setModelFileName(const char* onnxFilename) noexcept
 92 |     {
 93 |         mModelFilename = string(onnxFilename);
 94 |     }
 95 |     virtual nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept
 96 |     {
 97 |         return mVerbosity;
 98 |     }
 99 |     virtual void addVerbosity() noexcept
100 |     {
101 |         ++mVerbosity;
102 |     }
103 |     virtual void reduceVerbosity() noexcept
104 |     {
105 |         --mVerbosity;
106 |     }
107 |     virtual void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept
108 |     {
109 |         mVerbosity = verbosity;
110 |     }
111 | 
112 |     virtual const char* getTextFileName() const noexcept
113 |     {
114 |         return mTextFilename.c_str();
115 |     }
116 |     virtual void setTextFileName(const char* textFilename) noexcept
117 |     {
118 |         mTextFilename = string(textFilename);
119 |     }
120 |     virtual const char* getFullTextFileName() const noexcept
121 |     {
122 |         return mFullTextFilename.c_str();
123 |     }
124 |     virtual void setFullTextFileName(const char* fullTextFilename) noexcept
125 |     {
126 |         mFullTextFilename = string(fullTextFilename);
127 |     }
128 |     virtual bool getPrintLayerInfo() const noexcept
129 |     {
130 |         return mPrintLayercInfo;
131 |     }
132 |     virtual void setPrintLayerInfo(bool src) noexcept
133 |     {
134 |         mPrintLayercInfo = src;
135 |     } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
136 | 
137 |     virtual bool isDebug() const noexcept
138 |     {
139 | #if ONNX_DEBUG
140 |         return (std::getenv("ONNX_DEBUG") ? true : false);
141 | #else
142 |         return false;
143 | #endif
144 |     }
145 | 
146 |     virtual void destroy() noexcept
147 |     {
148 |         delete this;
149 |     }
150 | 
151 | }; // class ParserOnnxConfig
152 | 
153 | #endif
154 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/pluginLogger.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef PLUGIN_LOGGER_H
18 | #define PLUGIN_LOGGER_H
19 | 
20 | #include "pluginLogging.h"
21 | 
22 | namespace
23 | {
24 | Logger gLogger{Logger::Severity::kINFO};
25 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
26 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
27 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
28 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
29 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
30 | } // namespace
31 | 
32 | #endif // PLUGIN_LOGGER_H
33 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/reducedMathPlugin.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | namespace nvinfer1
18 | {
19 | namespace plugin
20 | {
21 | namespace detail
22 | {
23 | 
24 | // Count leading zeros - start from most significant bit.
25 | int clz(int x)
26 | {
27 |     for (int i = 31; i >= 0; --i)
28 |     {
29 |         if ((1 << i) & x)
30 |         {
31 |             return 31 - i;
32 |         }
33 |     }
34 |     return 32;
35 | }
36 | 
37 | #define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) -1)))
38 | 
39 | int find_log_2(int x, bool round_up = false)
40 | {
41 |     int a = 31 - clz(x);
42 |     if (round_up)
43 |     {
44 |         a += !CUDNN_IS_POW_2(x);
45 |     }
46 |     return a;
47 | }
48 | 
49 | void find_divisor(int denom, unsigned int& mul_coeff, unsigned int& shift_coeff)
50 | {
51 |     if (denom == 0)
52 |     {
53 |         return;
54 |     }
55 |     if (denom == 1)
56 |     {
57 |         // if dividing by 1, reduced math doesn't work because mul_coeff would
58 |         // need to be 2^32, which doesn't fit into unsigned int.  the div()
59 |         // routine handles this special case separately.
60 |         mul_coeff = 0;
61 |         shift_coeff = 0;
62 |         return;
63 |     }
64 |     // To express the division N/D in terms of a multiplication, what we first
65 |     // imagine is simply N*(1/D).  However, 1/D will always evaluate to 0 (for D>1),
66 |     // so we need another way.  There's nothing that says we have to use exactly
67 |     // the fraction 1/D; instead it could be any X/Y that reduces to 1/D (i.e.,
68 |     // Y=X*D), or at least to "close enough" to it.  If we pick Y that is a power
69 |     // of two, then the N*(X/Y) can be N*X followed by a right-shift by some amount.
70 |     // The power of two we should pick should be at least 2^32, because in the
71 |     // div() routine we'll use umulhi(), which returns only the upper 32 bits --
72 |     // this being equivalent to a right-shift by 32.  But we might want a higher
73 |     // power of two for better accuracy depending on the magnitude of the denominator.
74 |     // Once we've picked Y, then X [our mul_coeff value] is simply Y/D, rounding up,
75 |     // and we save shift_coeff as whatever further shift we have to do beyond
76 |     // what the umulhi() implies.
77 |     unsigned int p = 31 + find_log_2(denom, true);
78 |     unsigned int m = ((1ull << p) + (unsigned int) denom - 1) / (unsigned int) denom;
79 |     mul_coeff = m;
80 |     shift_coeff = p - 32;
81 | }
82 | 
83 | } // namespace detail
84 | 
85 | } // namespace plugin
86 | 
87 | } // namespace nvinfer1
88 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/sampleInference.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef TRT_SAMPLE_INFERENCE_H
18 | #define TRT_SAMPLE_INFERENCE_H
19 | 
20 | #include <iostream>
21 | #include <memory>
22 | #include <string>
23 | #include <vector>
24 | 
25 | #include "NvInfer.h"
26 | 
27 | #include "sampleReporting.h"
28 | #include "sampleUtils.h"
29 | 
30 | namespace sample
31 | {
32 | 
33 | struct InferenceEnvironment
34 | {
35 |     TrtUniquePtr<nvinfer1::ICudaEngine> engine;
36 |     std::unique_ptr<Profiler> profiler;
37 |     std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
38 |     std::vector<std::unique_ptr<Bindings>> bindings;
39 |     bool error{false};
40 | };
41 | 
42 | //!
43 | //! \brief Set up contexts and bindings for inference
44 | //!
45 | bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference);
46 | 
47 | //!
48 | //! \brief Deserialize the engine and time how long it takes.
49 | //!
50 | bool timeDeserialize(InferenceEnvironment& iEnv);
51 | 
52 | //!
53 | //! \brief Run inference and collect timing, return false if any error hit during inference
54 | //!
55 | bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv, int device, std::vector<InferenceTrace>& trace);
56 | 
57 | } // namespace sample
58 | 
59 | #endif // TRT_SAMPLE_INFERENCE_H
60 | 


--------------------------------------------------------------------------------
/einsum/einsum_common8/serialize.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cassert>
 20 | #include <cstring>
 21 | #include <type_traits>
 22 | #include <vector>
 23 | 
 24 | #include <iostream>
 25 | using std::cerr;
 26 | using std::cout;
 27 | using std::endl;
 28 | 
 29 | template <typename T>
 30 | inline void serialize_value(void** buffer, T const& value);
 31 | 
 32 | template <typename T>
 33 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value);
 34 | 
 35 | namespace
 36 | {
 37 | 
 38 | template <typename T, class Enable = void>
 39 | struct Serializer
 40 | {
 41 | };
 42 | 
 43 | template <typename T>
 44 | struct Serializer<T,
 45 |     typename std::enable_if<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_pod<T>::value>::type>
 46 | {
 47 |     static size_t serialized_size(T const& value)
 48 |     {
 49 |         return sizeof(T);
 50 |     }
 51 |     static void serialize(void** buffer, T const& value)
 52 |     {
 53 |         ::memcpy(*buffer, &value, sizeof(T));
 54 |         reinterpret_cast<char*&>(*buffer) += sizeof(T);
 55 |     }
 56 |     static void deserialize(void const** buffer, size_t* buffer_size, T* value)
 57 |     {
 58 |         assert(*buffer_size >= sizeof(T));
 59 |         ::memcpy(value, *buffer, sizeof(T));
 60 |         reinterpret_cast<char const*&>(*buffer) += sizeof(T);
 61 |         *buffer_size -= sizeof(T);
 62 |     }
 63 | };
 64 | 
 65 | template <>
 66 | struct Serializer<const char*>
 67 | {
 68 |     static size_t serialized_size(const char* value)
 69 |     {
 70 |         return strlen(value) + 1;
 71 |     }
 72 |     static void serialize(void** buffer, const char* value)
 73 |     {
 74 |         ::strcpy(static_cast<char*>(*buffer), value);
 75 |         reinterpret_cast<char*&>(*buffer) += strlen(value) + 1;
 76 |     }
 77 |     static void deserialize(void const** buffer, size_t* buffer_size, const char** value)
 78 |     {
 79 |         *value = static_cast<char const*>(*buffer);
 80 |         size_t data_size = strnlen(*value, *buffer_size) + 1;
 81 |         assert(*buffer_size >= data_size);
 82 |         reinterpret_cast<char const*&>(*buffer) += data_size;
 83 |         *buffer_size -= data_size;
 84 |     }
 85 | };
 86 | 
 87 | template <typename T>
 88 | struct Serializer<std::vector<T>,
 89 |     typename std::enable_if<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_pod<T>::value>::type>
 90 | {
 91 |     static size_t serialized_size(std::vector<T> const& value)
 92 |     {
 93 |         return sizeof(value.size()) + value.size() * sizeof(T);
 94 |     }
 95 |     static void serialize(void** buffer, std::vector<T> const& value)
 96 |     {
 97 |         serialize_value(buffer, value.size());
 98 |         size_t nbyte = value.size() * sizeof(T);
 99 |         ::memcpy(*buffer, value.data(), nbyte);
100 |         reinterpret_cast<char*&>(*buffer) += nbyte;
101 |     }
102 |     static void deserialize(void const** buffer, size_t* buffer_size, std::vector<T>* value)
103 |     {
104 |         size_t size;
105 |         deserialize_value(buffer, buffer_size, &size);
106 |         value->resize(size);
107 |         size_t nbyte = value->size() * sizeof(T);
108 |         assert(*buffer_size >= nbyte);
109 |         ::memcpy(value->data(), *buffer, nbyte);
110 |         reinterpret_cast<char const*&>(*buffer) += nbyte;
111 |         *buffer_size -= nbyte;
112 |     }
113 | };
114 | 
115 | } // namespace
116 | 
117 | template <typename T>
118 | inline size_t serialized_size(T const& value)
119 | {
120 |     return Serializer<T>::serialized_size(value);
121 | }
122 | 
123 | template <typename T>
124 | inline void serialize_value(void** buffer, T const& value)
125 | {
126 |     return Serializer<T>::serialize(buffer, value);
127 | }
128 | 
129 | template <typename T>
130 | inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value)
131 | {
132 |     return Serializer<T>::deserialize(buffer, buffer_size, value);
133 | }
134 | 


--------------------------------------------------------------------------------
/function/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | AUX_SOURCE_DIRECTORY(./ DIR_LIB_SRCS)
2 | #SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
3 | ADD_LIBRARY(functionlib ${DIR_LIB_SRCS})
4 | 


--------------------------------------------------------------------------------
/function/TrtInfer.cpp:
--------------------------------------------------------------------------------
 1 | #include "TrtInfer.h"
 2 | void allocate_buffers(std::unique_ptr<ICudaEngine, InferDeleter> &engine,int max_batch_size,
 3 |                std::vector<int> &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers)
 4 | {
 5 |     // 3.2 分配输入、输出内存（cpu+gpu）
 6 |     int NbBindings = engine->getNbBindings();  // number of input+output
 7 | //    void* buffers[NbBindings];  // initialize buffers(for gpu data)
 8 | 
 9 |     for (int i = 0; i < NbBindings; i++)
10 |     {
11 |         auto dims = engine->getBindingDimensions(i);
12 |         size_t vol = static_cast<size_t>(max_batch_size);
13 |         DataType type = engine->getBindingDataType(i);
14 |         vol *= samplesCommon::volume(dims);
15 |         size_t size_binding = vol * samplesCommon::getElementSize(type);
16 | 
17 |         cudaMalloc(&buffers[i], size_binding);  // allocate gpu memery
18 |         std::vector<float> temp_data(vol);
19 |         bool is_input = engine->bindingIsInput(i);
20 |         if(is_input){ // 分配
21 |             inputIndex.push_back(i);
22 |             input_cpu_data.push_back(temp_data);  // 创建cpu输入
23 |             input_cpu_data.size.push_back(size_binding);  // 记录输入占用字节数
24 |         }
25 |         else {
26 |             output_cpu_data.push_back(temp_data);
27 |             output_cpu_data.size.push_back(size_binding);
28 |         }
29 |     }
30 |     return;
31 | }
32 | void trt_infer(cudaStream_t &stream,std::unique_ptr<IExecutionContext, InferDeleter> &context,int max_batch_size,
33 |                std::vector<int> &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers)
34 | {
35 |     auto start_time = std::chrono::system_clock::now();
36 |     for(int i=0; i<inputIndex.size();++i){
37 |         cudaMemcpyAsync(buffers[inputIndex[i]],input_cpu_data[i].data(),input_cpu_data.size[i],cudaMemcpyHostToDevice,stream);
38 |     }
39 |     //  context->enqueue(max_batch_size,buffers,stream,nullptr);
40 |     context->execute(max_batch_size,buffers);
41 |     for(int i=0; i<output_cpu_data.data()->size();i++){
42 |         cudaMemcpyAsync(output_cpu_data[i].data(), buffers[i+inputIndex.size()], output_cpu_data.size[i], cudaMemcpyDeviceToHost, stream);
43 |     }
44 |     cudaStreamSynchronize(stream);
45 |     auto end_time = std::chrono::system_clock::now();
46 |     std::cout << "infer time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count() << "ms" << std::endl;
47 | }
48 | 


--------------------------------------------------------------------------------
/function/function.cpp:
--------------------------------------------------------------------------------
 1 | #include "function.h"
 2 | using namespace std;
 3 | using namespace nvinfer1;
 4 | Logger gLogger;
 5 | std::vector<std::vector<float>> reshape_1to2D(std::vector<int> shape,std::vector<float> data){
 6 |     std::vector<std::vector<float>> output(shape[0]);
 7 |     for(int i=0; i<output.size();++i)
 8 |         output[i].resize(shape[1]);
 9 | 
10 |     for(int i=0; i<shape[0];++i){
11 |         for(int j=0; j<shape[1]; ++j){
12 |             output[i][j] = data[i*shape[1]+j];
13 |         }
14 |     }
15 |     return output;
16 | }
17 | void printfVector2D(std::vector<std::vector<float>> arrays,int max_lengths){
18 |     for(int i = 0; i < arrays.size() && i<max_lengths; ++i) {
19 |         for(int j = 0; j < arrays[i].size() && j < max_lengths; ++j) {
20 |             std::cout << arrays[i][j] << " ";
21 |         }
22 |         std::cout << "\n";
23 |     }
24 | }
25 | ICudaEngine* loadEngine(const std::string& engine)
26 | {
27 |     std::ifstream engineFile(engine, std::ios::binary);
28 |     if (!engineFile)
29 |     {
30 |         std::cout << "Error opening engine file: " << engine << std::endl;
31 |         return nullptr;
32 |     }
33 | 
34 |     engineFile.seekg(0, engineFile.end);
35 |     long int fsize = engineFile.tellg();
36 |     engineFile.seekg(0, engineFile.beg);
37 | 
38 |     std::vector<char> engineData(fsize);
39 |     engineFile.read(engineData.data(), fsize);
40 |     if (!engineFile)
41 |     {
42 |         std::cout << "Error loading engine file: " << engine << std::endl;
43 |         return nullptr;
44 |     }
45 | 
46 |     std::unique_ptr<IRuntime,InferDeleter> runtime(createInferRuntime(gLogger));
47 | 
48 |     return runtime->deserializeCudaEngine(engineData.data(), fsize, nullptr);
49 | }
50 | 


--------------------------------------------------------------------------------
/function/image.cpp:
--------------------------------------------------------------------------------
 1 | #include <opencv2/opencv.hpp>
 2 | #include "image.hpp"
 3 | 
 4 | static const float kMean[3] = { 0.485f, 0.456f, 0.406f };
 5 | static const float kStdDev[3] = { 0.229f, 0.224f, 0.225f };
 6 | static const int map_[7][3] = { {0,0,0} ,
 7 | 				{128,0,0},
 8 | 				{0,128,0},
 9 | 				{0,0,128},
10 | 				{128,128,0},
11 | 				{128,0,128},
12 | 				{0,128,0}};
13 | 
14 | 
15 | float* normal(cv::Mat img) {
16 |     //将cv::Mat格式的图片,转换成一维float向量
17 |     float * data = (float*)calloc(img.rows*img.cols * img.channels(), sizeof(float));
18 | //    printf("image channel %d\n",img.channels());
19 |     if(img.channels()==3){
20 |         for (int c = 0; c < 3; ++c)
21 |         {
22 |             for (int i = 0; i < img.rows; ++i)
23 |             { //获取第i行首像素指针
24 |                 cv::Vec3b *p1 = img.ptr<cv::Vec3b>(i);
25 |                 for (int j = 0; j < img.cols; ++j)
26 |                 {
27 |                     data[c * img.cols * img.rows + i * img.cols + j] = (p1[j][c] / 255.0f - kMean[c]) / kStdDev[c];
28 |                 }
29 |             }
30 |         }
31 |     }
32 |     else if(img.channels()==1){
33 |         for (int c = 0; c < 1; ++c)
34 |         {
35 |             for (int i = 0; i < img.rows; ++i)
36 |             { //获取第i行首像素指针
37 |                 cv::Vec<uchar,1> *p1 = img.ptr<cv::Vec<uchar,1>>(i);
38 |                 for (int j = 0; j < img.cols; ++j)
39 |                 {
40 |                     data[c * img.cols * img.rows + i * img.cols + j] = p1[j][c];
41 |                 }
42 |             }
43 |         }
44 |     }
45 |     else{
46 |         printf("!!!!!!!!!!!!!!!!!!图片输入错误\n");
47 |     }
48 | 	return data;
49 | }
50 | 


--------------------------------------------------------------------------------
/gcn.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/gcn.onnx


--------------------------------------------------------------------------------
/generate_onnx.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch
 3 | import torch.nn as nn
 4 | import numpy as np
 5 | import onnx
 6 | 
 7 | class model(nn.Module):
 8 |     def __init__(self, in_channel=1):
 9 |         super().__init__()
10 |         self.A = np.ones(shape=(3, 15, 15))
11 |         self.A = torch.tensor(self.A, dtype=torch.float32, requires_grad=False)
12 | 
13 |     def forward(self, x: torch.Tensor):
14 |         x = x.permute(0, 2, 3, 1, 4).contiguous()
15 |         x = torch.einsum('nctkv,kvw->nctw', x, self.A)
16 |         return x
17 | 
18 | input_tensor = torch.ones(size=[1, 3, 1, 1, 15])
19 | Model = model()
20 | Model.cuda()
21 | out = Model(input_tensor)
22 | 
23 | input_name = ['input1']  # , 'input2']
24 | output_name = ['output1']  # , 'output2']  # 必须要有输入输出
25 | torch.onnx.export(Model,
26 |                   input_tensor,
27 |                   './gcn.onnx',
28 |                   input_names=input_name, output_names=output_name,
29 |                   verbose=True,
30 |                   opset_version=12
31 |                   )
32 | model = onnx.load('./gcn.onnx')
33 | print(onnx.checker.check_model(model))
34 | print(out)
35 | print(out.size())
36 | print(out.sum())
37 | 
38 | 


--------------------------------------------------------------------------------
/include/Head.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "function.h"
3 | #include "TrtInfer.h"
4 | 


--------------------------------------------------------------------------------
/include/TrtInfer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "function.h"
3 | void allocate_buffers(std::unique_ptr<ICudaEngine, InferDeleter> &engine,int max_batch_size,
4 |                std::vector<int> &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers);
5 | void trt_infer(cudaStream_t &stream,std::unique_ptr<IExecutionContext, InferDeleter> &context,int max_batch_size,
6 |                std::vector<int> &inputIndex,CPU_data &input_cpu_data, CPU_data &output_cpu_data,void** buffers);
7 | 


--------------------------------------------------------------------------------
/include/function.h:
--------------------------------------------------------------------------------
 1 | //#ifndef FUNCTION_H
 2 | //#define FUNCTION_H
 3 | #pragma once
 4 | #include <iostream>
 5 | #include "NvInferRuntime.h"
 6 | #include "NvInferRuntimeCommon.h"
 7 | #include "NvOnnxConfig.h"
 8 | #include "NvOnnxParser.h"
 9 | #include "NvUtils.h"
10 | #include <cuda_runtime_api.h>
11 | #include <fstream>
12 | #include <cmath>
13 | #include <memory>
14 | 
15 | #include <assert.h>
16 | #include <opencv2/opencv.hpp>
17 | #include <chrono>
18 | #include <unistd.h>
19 | #include "buffers.h"
20 | #include "image.hpp"
21 | 
22 | class CPU_data :public std::vector<std::vector<float>>
23 | {
24 | public:
25 |     CPU_data() {}
26 |     std::vector<size_t> size;  // 记录每个数据的内存占用字节数 *sizeof(float)
27 | };
28 | 
29 | using namespace nvinfer1;
30 | class Logger : public ILogger
31 | {
32 |     void log(Severity severity, const char* msg) throw() override
33 |     {
34 |         // suppress info-level messages
35 |         if (severity != Severity::kINFO)
36 |         std::cout << msg << std::endl;
37 |     }
38 | };
39 | 
40 | extern Logger gLogger;
41 | struct InferDeleter
42 | {
43 |   template <typename T>
44 |   void operator()(T* obj) const{
45 |     if (obj)
46 |     {
47 |         obj->destroy();
48 |     }
49 |   }
50 | };
51 | 
52 | struct CudaDeleter
53 | {
54 |   void operator()(void* obj){
55 |     if (obj)
56 |     {
57 |         cudaFree(obj);
58 |     }
59 |   }
60 | };
61 | //constexpr long double operator"" _GiB(long double val)
62 | //{
63 | //    return val * (1 << 30);
64 | //}
65 | //constexpr long double operator"" _MiB(long double val)
66 | //{
67 | //    return val * (1 << 20);
68 | //}
69 | //constexpr long double operator"" _KiB(long double val)
70 | //{
71 | //    return val * (1 << 10);
72 | //}
73 | std::vector<std::vector<float>> reshape_1to2D(std::vector<int> shape,std::vector<float> data);
74 | void printfVector2D(std::vector<std::vector<float>> arrays,int max_lengths);
75 | 
76 | ICudaEngine* loadEngine(const std::string& engine);
77 | 
78 | //#endif // FUNCTION_H
79 | 


--------------------------------------------------------------------------------
/include/image.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | typedef struct {
3 | 	int w;
4 | 	int h;
5 | 	int c;
6 | 	float *data;
7 | } image;
8 | float* normal(cv::Mat img);
9 | 


--------------------------------------------------------------------------------
/infer_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "Head.h"
 2 | 
 3 | using namespace std;
 4 | using namespace nvinfer1;
 5 | 
 6 | std::string image_path = "../data/tabby_tiger_cat.jpg";
 7 | std::string engine_path = "../data/resnet50.engine";
 8 | const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
 9 | 
10 | // 0. 参数
11 | int max_batch_size = 1;
12 | int INPUT_H=224,INPUT_W=224,INPUT_C=3;
13 | std::vector<std::vector<int>> OUTPUT_SIZE{{7,7},{1,1000}};  // for reshape output
14 | //std::vector<std::vector<int>> OUTPUT_SIZE{{1,1000}};
15 | 
16 | int main()
17 | {
18 |   std::unique_ptr<ICudaEngine, InferDeleter> engine(loadEngine(engine_path));
19 | 
20 |   // 5. 生成context
21 |   std::unique_ptr<IExecutionContext, InferDeleter> context(engine->createExecutionContext());
22 |   // 3. 推理
23 |   // 3.1 构建stream
24 |   cudaStream_t stream;
25 |   cudaStreamCreate(&stream);
26 | 
27 |   // 3.2 分配输入、输出内存（cpu+gpu）
28 |   std::vector<int> inputIndex;  // 输入索引
29 |   CPU_data input_cpu_data, output_cpu_data;
30 |   int NbBindings = engine->getNbBindings();  // number of input+output
31 |   void* buffers[NbBindings];  // initialize buffers(for gpu data)
32 | 
33 |   for (int i = 0; i < NbBindings; i++)
34 |   {
35 |       auto dims = engine->getBindingDimensions(i);
36 |       size_t vol = static_cast<size_t>(max_batch_size);
37 |       DataType type = engine->getBindingDataType(i);
38 |       vol *= samplesCommon::volume(dims);
39 |       size_t size_binding = vol * samplesCommon::getElementSize(type);
40 | 
41 |       cudaMalloc(&buffers[i], size_binding);  // allocate gpu memery
42 |       vector<float> temp_data(vol);
43 |       bool is_input = engine->bindingIsInput(i);
44 |       if(is_input){ // 分配
45 |           inputIndex.push_back(i);
46 |           input_cpu_data.push_back(temp_data);  // 创建cpu输入
47 |           input_cpu_data.size.push_back(size_binding);  // 记录输入占用字节数
48 |       }
49 |       else {
50 |           output_cpu_data.push_back(temp_data);
51 |           output_cpu_data.size.push_back(size_binding);
52 |       }
53 |   }
54 |   // 3.3 加载输入
55 |   cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR);
56 |   cv::cvtColor(img,img,cv::COLOR_BGR2RGB);
57 |   cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3);
58 |   cv::resize(img,dst, dst.size());
59 |   float* fileData=normal(dst);
60 |   for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){
61 |       input_cpu_data[0][i] = fileData[i];
62 | //        std::cout << (" .:-=+*#%@"[static_cast<uchar>(fileData[i]) / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");
63 |   }
64 |   free(fileData);  // 释放图片
65 |   for(int i = 0; i <2;i++){  // 第一次启动GPU，会消耗很长时间，第二次就正常速度了
66 |       auto start_time = std::chrono::system_clock::now();
67 |       for(int i=0; i<inputIndex.size();++i){
68 |           cudaMemcpyAsync(buffers[inputIndex[i]],input_cpu_data[i].data(),input_cpu_data.size[i],cudaMemcpyHostToDevice,stream);
69 |       }
70 |     //  context->enqueue(max_batch_size,buffers,stream,nullptr);
71 |       context->execute(max_batch_size,buffers);
72 |       for(int i=0; i<NbBindings-inputIndex.size();i++){
73 |           cudaMemcpyAsync(output_cpu_data[i].data(), buffers[i+inputIndex.size()], output_cpu_data.size[i], cudaMemcpyDeviceToHost, stream);
74 |       }
75 |       cudaStreamSynchronize(stream);
76 |       auto end_time = std::chrono::system_clock::now();
77 |       std::cout << "infer time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count() << "ms" << std::endl;
78 |   }
79 |   cudaStreamDestroy(stream);
80 | 
81 |   for(int i = 0; i < OUTPUT_SIZE.size(); ++i){
82 |       std::vector<std::vector<float>> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]);
83 |       printfVector2D(a,10);
84 |   }
85 | 
86 |   return 1;
87 | }
88 | 


--------------------------------------------------------------------------------
/mytest.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xn1997/TensorRT-EinsumPlugin/b528d1f0d383bd7e08767de496587a57af6ab4d1/mytest.cpp


--------------------------------------------------------------------------------
/resnet50.cpp:
--------------------------------------------------------------------------------
  1 | #include "function.h"
  2 | 
  3 | using namespace std;
  4 | using namespace nvinfer1;
  5 | 
  6 | const char* onnxModelFile = "../data/resnet50.onnx";
  7 | const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
  8 | 
  9 | int main()
 10 | {
 11 |     std::unique_ptr<IBuilder, InferDeleter> builder(createInferBuilder(gLogger));
 12 |     builder->setMaxBatchSize(1);
 13 | 
 14 |     std::unique_ptr<INetworkDefinition, InferDeleter> network(builder->createNetworkV2(explicitBatch));
 15 |     std::unique_ptr<nvonnxparser::IParser, InferDeleter> parser(nvonnxparser::createParser(*network, gLogger));
 16 |     parser->parseFromFile(onnxModelFile, static_cast<int>(ILogger::Severity::kWARNING));
 17 | 
 18 |     std::unique_ptr<IBuilderConfig, InferDeleter> config(builder->createBuilderConfig());
 19 |     config->setMaxWorkspaceSize(1_GiB);
 20 |     config->setFlag(BuilderFlag::kGPU_FALLBACK);
 21 |     config->setFlag(BuilderFlag::kSTRICT_TYPES);
 22 | 
 23 |     std::unique_ptr<ICudaEngine, InferDeleter> engine(builder->buildEngineWithConfig(*network, *config));
 24 |     std::unique_ptr<IExecutionContext, InferDeleter> context(engine->createExecutionContext());
 25 | 
 26 |     // 0. 参数
 27 |     int max_batch_size = 1;
 28 |     int INPUT_H=224,INPUT_W=224,INPUT_C=3;
 29 |     std::vector<std::vector<int>> OUTPUT_SIZE{{7,7},{1,1000}};  // for reshape output
 30 | 
 31 |     // 3. 推理
 32 |     // 3.1 构建stream
 33 |     cudaStream_t stream;
 34 |     cudaStreamCreate(&stream);
 35 | 
 36 |     // 3.2 分配输入、输出内存（cpu+gpu）
 37 |     std::vector<int> inputIndex;  // 输入索引
 38 |     CPU_data input_cpu_data, output_cpu_data;
 39 |     int NbBindings = engine->getNbBindings();  // number of input+output
 40 |     void* buffers[NbBindings];  // initialize buffers(for gpu data)
 41 | 
 42 |     for (int i = 0; i < NbBindings; i++)
 43 |     {
 44 |         auto dims = engine->getBindingDimensions(i);
 45 |         size_t vol = static_cast<size_t>(max_batch_size);
 46 |         DataType type = engine->getBindingDataType(i);
 47 |         vol *= samplesCommon::volume(dims);
 48 |         size_t size_binding = vol * samplesCommon::getElementSize(type);
 49 | 
 50 |         cudaMalloc(&buffers[i], size_binding);  // allocate gpu memery
 51 |         vector<float> temp_data(vol);
 52 |         bool is_input = engine->bindingIsInput(i);
 53 |         if(is_input){ // 分配
 54 |             inputIndex.push_back(i);
 55 |             input_cpu_data.push_back(temp_data);  // 创建cpu输入
 56 |             input_cpu_data.size.push_back(size_binding);  // 记录输入占用字节数
 57 |         }
 58 |         else {
 59 |             output_cpu_data.push_back(temp_data);
 60 |             output_cpu_data.size.push_back(size_binding);
 61 |         }
 62 |     }
 63 |     // 3.3 加载输入
 64 |     std::string image_path = "../data/tabby_tiger_cat.jpg";
 65 |     cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR);
 66 |     cv::cvtColor(img,img,cv::COLOR_BGR2RGB);
 67 |     cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3);
 68 |     cv::resize(img,dst, dst.size());
 69 |     float* fileData=normal(dst);
 70 |     for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){
 71 |         input_cpu_data[0][i] = fileData[i];
 72 |     }
 73 |     free(fileData);  // 释放图片
 74 |     // 3.2 输入从cpu拷贝至gpu
 75 |     for(int i=0; i<inputIndex.size();++i){
 76 |         cudaMemcpyAsync(buffers[inputIndex[i]],input_cpu_data[i].data(),input_cpu_data.size[i],cudaMemcpyHostToDevice,stream);
 77 |     }
 78 |     // 3.3 infer 即在gpu执行kernel
 79 |     bool is_success = context->enqueue(max_batch_size,buffers,stream,nullptr);
 80 |     if(is_success)
 81 |         std::cout << "Forward success !" << std::endl;
 82 |     else
 83 |         std::cout << "Forward Error !" << std::endl;
 84 |     // 3.4 输出从gpu拷贝至cpu
 85 |     for(int i=0; i<NbBindings-inputIndex.size();i++){
 86 |         cudaMemcpyAsync(output_cpu_data[i].data(), buffers[i+inputIndex.size()], output_cpu_data.size[i], cudaMemcpyDeviceToHost, stream);
 87 |     }
 88 |     // 3.5 同步stream，并销毁
 89 |     cudaStreamSynchronize(stream);
 90 |     cudaStreamDestroy(stream);
 91 | 
 92 |     // postprogress: out 1d to 2d
 93 |     for(int i = 0; i < OUTPUT_SIZE.size(); ++i){
 94 |         std::vector<std::vector<float>> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]);
 95 |         printfVector2D(a,10);
 96 |     }
 97 | 
 98 |     return 1;
 99 | }
100 | 


--------------------------------------------------------------------------------
/sample.cpp:
--------------------------------------------------------------------------------
 1 | #include "Head.h"
 2 | 
 3 | using namespace std;
 4 | using namespace nvinfer1;
 5 | 
 6 | std::string image_path = "../data/tabby_tiger_cat.jpg";
 7 | std::string engine_path = "../data/resnet50.engine";
 8 | // 0. 参数
 9 | int max_batch_size = 1;
10 | int INPUT_H=224,INPUT_W=224,INPUT_C=3;
11 | std::vector<std::vector<int>> OUTPUT_SIZE{{7,7},{1,1000}};  // for reshape output
12 | //std::vector<std::vector<int>> OUTPUT_SIZE{{1,1000}};
13 | 
14 | int main()
15 | {
16 |     std::unique_ptr<ICudaEngine, InferDeleter> engine(loadEngine(engine_path));
17 |     std::unique_ptr<IExecutionContext, InferDeleter> context(engine->createExecutionContext());
18 | 
19 |     // 3.1 构建stream
20 |     cudaStream_t stream;
21 |     cudaStreamCreate(&stream);
22 | 
23 |     // 3.2 分配输入、输出内存（cpu+gpu）
24 |     std::vector<int> inputIndex;  // 输入索引
25 |     CPU_data input_cpu_data, output_cpu_data;
26 |     int NbBindings = engine->getNbBindings();  // number of input+output
27 |     void* buffers[NbBindings];
28 |     allocate_buffers(engine,max_batch_size,inputIndex,input_cpu_data,output_cpu_data,buffers);  // initialize buffers(for gpu data)
29 | 
30 |     // 3.3 加载输入
31 |     cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR);
32 |     cv::cvtColor(img,img,cv::COLOR_BGR2RGB);
33 |     cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3);
34 |     cv::resize(img,dst, dst.size());
35 |     float* fileData=normal(dst);
36 |     for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){
37 |         input_cpu_data[0][i] = fileData[i];
38 |         //        std::cout << (" .:-=+*#%@"[static_cast<uchar>(fileData[i]) / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");
39 |     }
40 |     free(fileData);  // 释放图片
41 |     // infer
42 |     for(int i = 0; i <2;i++){  // 第一次启动GPU，会消耗很长时间，第二次就正常速度了
43 |         trt_infer(stream,context,max_batch_size,inputIndex,input_cpu_data,output_cpu_data,buffers);
44 |     }
45 | 
46 |     cudaStreamDestroy(stream);
47 |     //output postprogress
48 |     for(int i = 0; i < OUTPUT_SIZE.size(); ++i){
49 |         std::vector<std::vector<float>> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]);
50 |         printfVector2D(a,10);
51 |     }
52 | 
53 |     return 1;
54 | }
55 | 


--------------------------------------------------------------------------------
/testNet.cpp:
--------------------------------------------------------------------------------
  1 | #include "function.h"
  2 | 
  3 | using namespace std;
  4 | using namespace nvinfer1;
  5 | 
  6 | const char* onnxModelFile = "../data/test.onnx";
  7 | const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
  8 | 
  9 | int main()
 10 | {
 11 |     std::unique_ptr<IBuilder, InferDeleter> builder(createInferBuilder(gLogger));
 12 |     builder->setMaxBatchSize(1);
 13 | 
 14 |     std::unique_ptr<INetworkDefinition, InferDeleter> network(builder->createNetworkV2(explicitBatch));
 15 |     std::unique_ptr<nvonnxparser::IParser, InferDeleter> parser(nvonnxparser::createParser(*network, gLogger));
 16 |     parser->parseFromFile(onnxModelFile, static_cast<int>(ILogger::Severity::kWARNING));
 17 | 
 18 |     std::unique_ptr<IBuilderConfig, InferDeleter> config(builder->createBuilderConfig());
 19 |     config->setMaxWorkspaceSize(1_GiB);
 20 |     config->setFlag(BuilderFlag::kGPU_FALLBACK);
 21 |     config->setFlag(BuilderFlag::kSTRICT_TYPES);
 22 | 
 23 |     std::unique_ptr<ICudaEngine, InferDeleter> engine(builder->buildEngineWithConfig(*network, *config));
 24 |     std::unique_ptr<IExecutionContext, InferDeleter> context(engine->createExecutionContext());
 25 | 
 26 |     // 0. 参数
 27 |     int max_batch_size = 1;
 28 |     int INPUT_H=224,INPUT_W=224,INPUT_C=3;
 29 |     std::vector<std::vector<int>> OUTPUT_SIZE{{4,4},{1,5}};  // for reshape output
 30 | 
 31 |     // 3. 推理
 32 |     // 3.1 构建stream
 33 |     cudaStream_t stream;
 34 |     cudaStreamCreate(&stream);
 35 | 
 36 |     // 3.2 分配输入、输出内存（cpu+gpu）
 37 |     std::vector<int> inputIndex;  // 输入索引
 38 |     CPU_data input_cpu_data, output_cpu_data;
 39 |     int NbBindings = engine->getNbBindings();  // number of input+output
 40 |     void* buffers[NbBindings];  // initialize buffers(for gpu data)
 41 | 
 42 |     for (int i = 0; i < NbBindings; i++)
 43 |     {
 44 |         auto dims = engine->getBindingDimensions(i);
 45 |         size_t vol = static_cast<size_t>(max_batch_size);
 46 |         DataType type = engine->getBindingDataType(i);
 47 |         vol *= samplesCommon::volume(dims);
 48 |         size_t size_binding = vol * samplesCommon::getElementSize(type);
 49 | 
 50 |         cudaMalloc(&buffers[i], size_binding);  // allocate gpu memery
 51 |         vector<float> temp_data(vol);
 52 |         bool is_input = engine->bindingIsInput(i);
 53 |         if(is_input){ // 分配
 54 |             inputIndex.push_back(i);
 55 |             input_cpu_data.push_back(temp_data);  // 创建cpu输入
 56 |             input_cpu_data.size.push_back(size_binding);  // 记录输入占用字节数
 57 |         }
 58 |         else {
 59 |             output_cpu_data.push_back(temp_data);
 60 |             output_cpu_data.size.push_back(size_binding);
 61 |         }
 62 |     }
 63 |     // 3.3 加载输入
 64 |     std::string image_path = "../data/tabby_tiger_cat.jpg";
 65 |     cv::Mat img = cv::imread(image_path, cv::IMREAD_COLOR);
 66 |     cv::cvtColor(img,img,cv::COLOR_BGR2RGB);
 67 |     cv::Mat dst = cv::Mat::zeros(INPUT_H, INPUT_W, CV_32FC3);
 68 |     cv::resize(img,dst, dst.size());
 69 |     float* fileData=normal(dst);
 70 |     for(int i = 0; i < INPUT_H*INPUT_W*INPUT_C; ++i){
 71 |         input_cpu_data[0][i] = fileData[i];
 72 | //        std::cout << (" .:-=+*#%@"[static_cast<uchar>(fileData[i]) / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");
 73 |     }
 74 |     free(fileData);  // 释放图片
 75 |     // 3.2 输入从cpu拷贝至gpu
 76 |     for(int i=0; i<inputIndex.size();++i){
 77 |         cudaMemcpyAsync(buffers[inputIndex[i]],input_cpu_data[i].data(),input_cpu_data.size[i],cudaMemcpyHostToDevice,stream);
 78 |     }
 79 |     // 3.3 infer 即在gpu执行kernel
 80 |     bool is_success = context->enqueue(max_batch_size,buffers,stream,nullptr);
 81 |     if(is_success)
 82 |         std::cout << "Forward success !" << std::endl;
 83 |     else
 84 |         std::cout << "Forward Error !" << std::endl;
 85 |     // 3.4 输出从gpu拷贝至cpu
 86 |     for(int i=0; i<NbBindings-inputIndex.size();i++){
 87 |         cudaMemcpyAsync(output_cpu_data[i].data(), buffers[i+inputIndex.size()], output_cpu_data.size[i], cudaMemcpyDeviceToHost, stream);
 88 |     }
 89 |     // 3.5 同步stream，并销毁
 90 |     cudaStreamSynchronize(stream);
 91 |     cudaStreamDestroy(stream);
 92 | 
 93 |     for(int i = 0; i < OUTPUT_SIZE.size(); ++i){
 94 |         std::vector<std::vector<float>> a = reshape_1to2D(OUTPUT_SIZE[i],output_cpu_data[i]);
 95 |         printfVector2D(a,10);
 96 |     }
 97 | 
 98 |     return 1;
 99 | }
100 | 


--------------------------------------------------------------------------------