├── .clang-format
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── codeql.yml
    │   ├── main.yml
    │   ├── ok-to-test-command.yml
    │   └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake
    ├── TritonOnnxRuntimeBackendConfig.cmake.in
    └── download_onnxruntime.cmake
├── pyproject.toml
├── src
    ├── libtriton_onnxruntime.ldscript
    ├── onnxruntime.cc
    ├── onnxruntime_loader.cc
    ├── onnxruntime_loader.h
    ├── onnxruntime_utils.cc
    └── onnxruntime_utils.h
├── test
    └── initializer_as_input
    │   ├── README.md
    │   ├── generate_test_model.py
    │   ├── models
    │       └── add_with_initializer
    │       │   ├── 1
    │       │       └── model.onnx
    │       │   └── config.pbtxt
    │   ├── test.py
    │   └── test.sh
└── tools
    └── gen_ort_dockerfile.py


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | BasedOnStyle: Google
 3 | 
 4 | IndentWidth: 2
 5 | ColumnLimit: 80
 6 | ContinuationIndentWidth: 4
 7 | UseTab: Never
 8 | MaxEmptyLinesToKeep: 2
 9 | 
10 | SortIncludes: true
11 | CompactNamespaces: true
12 | ReflowComments: true
13 | 
14 | DerivePointerAlignment: false
15 | PointerAlignment: Left
16 | 
17 | AllowShortIfStatementsOnASingleLine: false
18 | AllowShortBlocksOnASingleLine: false
19 | AllowShortFunctionsOnASingleLine: Inline
20 | 
21 | AlwaysBreakAfterReturnType: TopLevelDefinitions
22 | AlignAfterOpenBracket: AlwaysBreak
23 | BreakBeforeBraces: Custom
24 | BraceWrapping:
25 |   AfterClass: false
26 |   AfterControlStatement: false
27 |   AfterEnum: false
28 |   AfterFunction: true
29 |   AfterNamespace: false
30 |   AfterStruct: false
31 |   AfterUnion: false
32 |   BeforeCatch: true
33 | 
34 | BinPackArguments: true
35 | BinPackParameters: true
36 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
37 | 
38 | IndentCaseLabels: true
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Description**
11 | A clear and concise description of what the bug is.
12 | 
13 | **Triton Information**
14 | What version of Triton are you using?
15 | 
16 | Are you using the Triton container or did you build it yourself?
17 | 
18 | **To Reproduce**
19 | 
20 | If the problem appears to be a bug in the execution of the model itself, first attempt to run the model directly in ONNX Runtime. What is the output from loading and running the model in ORT directly? If there is a problem running the model directly with ORT, please submit an issue in the microsoft/onnxruntime (github.com) project.
21 | 
22 | If the problem appears to be in Triton itself, provide detailed steps to reproduce the behavior in Triton.
23 | 
24 | Describe the models (framework, inputs, outputs), ideally include the model configuration file (if using an ensemble include the model configuration file for that as well).
25 | 
26 | **Expected behavior**
27 | A clear and concise description of what you expected to happen.
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "CodeQL"
28 | 
29 | on:
30 |   pull_request:
31 | 
32 | jobs:
33 |   analyze:
34 |     name: Analyze
35 |     runs-on: ubuntu-latest
36 |     permissions:
37 |       actions: read
38 |       contents: read
39 |       security-events: write
40 | 
41 |     strategy:
42 |       fail-fast: false
43 |       matrix:
44 |         language: [ 'python' ]
45 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
46 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
47 | 
48 |     steps:
49 |     - name: Checkout repository
50 |       uses: actions/checkout@v3
51 | 
52 |     # Initializes the CodeQL tools for scanning.
53 |     - name: Initialize CodeQL
54 |       uses: github/codeql-action/init@v2
55 |       with:
56 |         languages: ${{ matrix.language }}
57 |         # If you wish to specify custom queries, you can do so here or in a config file.
58 |         # By default, queries listed here will override any specified in a config file.
59 |         # Prefix the list here with "+" to use these queries and those in the config file.
60 | 
61 |         # Details on CodeQL's query packs refer to:
62 |         # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
63 |         queries: +security-and-quality
64 | 
65 | 
66 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
67 |     # If this step fails, then you should remove it and run the build manually (see below)
68 |     - name: Autobuild
69 |       uses: github/codeql-action/autobuild@v2
70 | 
71 |     # Command-line programs to run using the OS shell.
72 |     # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
73 | 
74 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
75 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
76 | 
77 |     # - run: |
78 |     #   echo "Run, Build Application using script"
79 |     #   ./location_of_script_within_repo/buildscript.sh
80 | 
81 |     - name: Perform CodeQL Analysis
82 |       uses: github/codeql-action/analyze@v2
83 |       with:
84 |         category: "/language:${{matrix.language}}"
85 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   issue_comment:
 4 |     types: [created]
 5 | jobs:
 6 |   slashCommandDispatch:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - name: Command Dispatch
10 |         uses: peter-evans/slash-command-dispatch@v2
11 |         with:
12 |           token: ${{ secrets.PAT }}
13 |           permission: maintain
14 |           issue-type: pull-request
15 |           commands: |
16 |             ok-to-test
17 | 


--------------------------------------------------------------------------------
/.github/workflows/ok-to-test-command.yml:
--------------------------------------------------------------------------------
 1 | name: ok-to-test-command
 2 | on:
 3 |   repository_dispatch:
 4 |     types: [ok-to-test-command]
 5 | 
 6 | jobs:
 7 |   # Repo owner has commented /ok-to-test on a pull request. For both a
 8 |   # fork-based or trusted request a SHA must be supplied which is checked against
 9 |   # the request SHA.
10 |   buildAndTest:
11 |     runs-on: [self-hosted, triton]
12 |     if:
13 |       (github.event.client_payload.slash_command.args.named.sha != '') &&
14 |       contains(github.event.client_payload.pull_request.head.sha,
15 |                github.event.client_payload.slash_command.args.named.sha)
16 |     steps:
17 |       - name: Build Server Container
18 |         if: github.event.client_payload.slash_command.args.named.skipbuild == ''
19 |         run: (cd /home/runner &&
20 |               bash -x ./build.sh --serverbranch "${{ github.event.client_payload.slash_command.args.named.serverbranch }}" --buildargs "--enable-logging --enable-stats --enable-tracing --enable-metrics --enable-gpu-metrics --enable-gpu --endpoint=http --endpoint=grpc --backend=custom --backend=ensemble --backend=identity --backend=repeat --backend=square --backend=onnxruntime:pull/${{ github.event.client_payload.pull_request.number }}/head ${{ github.event.client_payload.slash_command.args.named.buildargs }}")
21 | 
22 |       - name: Build QA Container
23 |         if: github.event.client_payload.slash_command.args.named.skipbuildqa == ''
24 |         run: (cd /home/runner && bash -x ./buildqa.sh)
25 | 
26 |       - name: Run Explicit Tests
27 |         id: explicit
28 |         if: github.event.client_payload.slash_command.args.named.tests != ''
29 |         continue-on-error: true
30 |         run: (cd /home/runner &&
31 |               bash -x ./test.sh --tests "${{ github.event.client_payload.slash_command.args.named.tests }}")
32 | 
33 |       - name: Run Sanity Tests
34 |         id: sanity
35 |         if: github.event.client_payload.slash_command.args.named.skipsanity == ''
36 |         continue-on-error: true
37 |         run: (cd /home/runner &&
38 |               bash -x ./test.sh --backends onnx --expected 38 --tests "L0_infer" &&
39 |               bash -x ./test.sh --backends onnx --tests "L0_batcher" &&
40 |               bash -x ./test.sh --backends onnx --tests "L0_sequence_batcher")
41 | 
42 |       - name: Show Result
43 |         uses: peter-evans/create-or-update-comment@v1
44 |         with:
45 |           token: ${{ secrets.PAT }}
46 |           repository: ${{ github.event.client_payload.github.payload.repository.full_name }}
47 |           comment-id: ${{ github.event.client_payload.github.payload.comment.id }}
48 |           body: |
49 |             ```
50 |             sanity ${{ toJson(steps.sanity.outputs) }}
51 |             explicit ${{ toJson(steps.explicit.outputs) }}
52 |             ```
53 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: pre-commit
28 | 
29 | on:
30 |   pull_request:
31 | 
32 | jobs:
33 |   pre-commit:
34 |     runs-on: ubuntu-22.04
35 |     steps:
36 |     - uses: actions/checkout@v3
37 |     - uses: actions/setup-python@v3
38 |     - uses: pre-commit/action@v3.0.0
39 | 
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /.vscode
3 | *.so
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | repos:
28 | - repo: https://github.com/timothycrosley/isort
29 |   rev: 5.12.0
30 |   hooks:
31 |   - id: isort
32 |     additional_dependencies: [toml]
33 | - repo: https://github.com/psf/black
34 |   rev: 23.1.0
35 |   hooks:
36 |   - id: black
37 |     types_or: [python, cython]
38 | - repo: https://github.com/PyCQA/flake8
39 |   rev: 5.0.4
40 |   hooks:
41 |   - id: flake8
42 |     args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
43 |     types_or: [python, cython]
44 | - repo: https://github.com/pre-commit/mirrors-clang-format
45 |   rev: v16.0.5
46 |   hooks:
47 |   - id: clang-format
48 |     types_or: [c, c++, cuda, proto, textproto, java]
49 |     args: ["-fallback-style=none", "-style=file", "-i"]
50 | - repo: https://github.com/codespell-project/codespell
51 |   rev: v2.2.4
52 |   hooks:
53 |   - id: codespell
54 |     additional_dependencies: [tomli]
55 |     args: ["--toml", "pyproject.toml"]
56 |     exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
57 | # More details about these pre-commit hooks here:
58 | # https://pre-commit.com/hooks.html
59 | - repo: https://github.com/pre-commit/pre-commit-hooks
60 |   rev: v4.4.0
61 |   hooks:
62 |   - id: check-case-conflict
63 |   - id: check-executables-have-shebangs
64 |   - id: check-merge-conflict
65 |   - id: check-json
66 |   - id: check-toml
67 |   - id: check-yaml
68 |   - id: check-shebang-scripts-are-executable
69 |   - id: end-of-file-fixer
70 |     types_or: [c, c++, cuda, proto, textproto, java, python]
71 |   - id: mixed-line-ending
72 |   - id: requirements-txt-fixer
73 |   - id: trailing-whitespace
74 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without
  4 | # modification, are permitted provided that the following conditions
  5 | # are met:
  6 | #  * Redistributions of source code must retain the above copyright
  7 | #    notice, this list of conditions and the following disclaimer.
  8 | #  * Redistributions in binary form must reproduce the above copyright
  9 | #    notice, this list of conditions and the following disclaimer in the
 10 | #    documentation and/or other materials provided with the distribution.
 11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | #    contributors may be used to endorse or promote products derived
 13 | #    from this software without specific prior written permission.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | cmake_minimum_required(VERSION 3.17)
 28 | 
 29 | project(tritononnxruntimebackend LANGUAGES C CXX)
 30 | 
 31 | #
 32 | # Options
 33 | #
 34 | # To build the ONNX Runtime backend you must either:
 35 | #
 36 | #   - Point to an already built ONNX Runtime using
 37 | #     TRITON_ONNXRUNTIME_INCLUDE_PATHS and
 38 | #     TRITON_ONNXRUNTIME_LIB_PATHS
 39 | #
 40 | #   or:
 41 | #
 42 | #   - Set TRITON_BUILD_ONNXRUNTIME_VERSION to the version of ONNX
 43 | #     Runtime that you want to be built for the backend.
 44 | #
 45 | #   - Set TRITON_BUILD_CONTAINER to the Triton container to use as a
 46 | #     base for the build. On linux you can instead set
 47 | #     TRITON_BUILD_CONTAINER_VERSION to the Triton version that you
 48 | #     want to target with the build and the corresponding container
 49 | #     from NGC will be used.
 50 | #
 51 | #   - Optionally set TRITON_BUILD_CUDA_VERSION and
 52 | #     TRITON_BUILD_CUDA_HOME. If not set these are automatically set
 53 | #     by using the standard cuda install location. For example on
 54 | #     windows these will be automatically set based on CUDA_PATH, for
 55 | #     example:
 56 | #
 57 | #         TRITON_BUILD_CUDA_VERSION=11.1
 58 | #         TRITON_BUILD_CUDA_HOME="C:\Program Files\NVIDIA GPU Computing Toolkit\v11.1"
 59 | #
 60 | #   - If you want TensorRT support set
 61 | #     TRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON and set TRITON_BUILD_TENSORRT_HOME.
 62 | #
 63 | #     Optionally set TRITON_ONNX_TENSORRT_REPO_TAG to specify a branch in https://github.com/onnx/onnx-tensorrt repo
 64 | #     example:
 65 | #         TRITON_ONNX_TENSORRT_REPO_TAG=master
 66 | #     This enables using a version of tensorrt which is not yet supported in ONNXRuntime release branch.
 67 | #     By default we pick the default branch which comes with the requested version of onnxruntime.
 68 | #
 69 | #     Optionally set TRT_VERSION to specify the version of TRT which is being used.
 70 | #     This along with TRITON_BUILD_ONNXRUNTIME_VERSION is used to pick the right onnx tensorrt parser version.
 71 | #     When TRITON_ONNX_TENSORRT_REPO_TAG is set TRT_VERSION is ignored.
 72 | #     When neither TRITON_ONNX_TENSORRT_REPO_TAG or TRT_VERSION are set
 73 | #     the default parser version which comes with ORT is picked.
 74 | #
 75 | #   - If you want OpenVINO support set
 76 | #     TRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON and set
 77 | #     TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION to the OpenVino
 78 | #     version that is compatible with the specified version of ONNX
 79 | #     Runtime.
 80 | #
 81 | #   - Optionally set TRITON_BUILD_TARGET_PLATFORM to either linux, windows or
 82 | #     igpu. If not set, the current platform will be used. If building on
 83 | #     Jetpack, always set to igpu to avoid misdetection.
 84 | #
 85 | #   - If you want to disable GPU usage, set TRITON_ENABLE_GPU=OFF.
 86 | #    This will make builds with CUDA and TensorRT flags to fail.
 87 | #
 88 | option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
 89 | option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
 90 | option(TRITON_ENABLE_ONNXRUNTIME_TENSORRT
 91 |   "Enable TensorRT execution provider for ONNXRuntime backend in server" OFF)
 92 | option(TRITON_ENABLE_ONNXRUNTIME_OPENVINO
 93 |   "Enable OpenVINO execution provider for ONNXRuntime backend in server" OFF)
 94 | set(TRITON_BUILD_CONTAINER "" CACHE STRING "Triton container to use a base for build")
 95 | set(TRITON_BUILD_CONTAINER_VERSION "" CACHE STRING "Triton container version to target")
 96 | set(TRITON_BUILD_ONNXRUNTIME_VERSION "" CACHE STRING "ONNXRuntime version to build")
 97 | set(TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION "" CACHE STRING "ONNXRuntime OpenVINO version to build")
 98 | set(TRITON_BUILD_TARGET_PLATFORM "" CACHE STRING "Target platform for ONNXRuntime build")
 99 | set(TRITON_BUILD_CUDA_VERSION "" CACHE STRING "Version of CUDA install")
100 | set(TRITON_BUILD_CUDA_HOME "" CACHE PATH "Path to CUDA install")
101 | set(TRITON_BUILD_CUDNN_HOME "" CACHE PATH "Path to CUDNN install")
102 | set(TRITON_BUILD_TENSORRT_HOME "" CACHE PATH "Path to TensorRT install")
103 | set(TRITON_ONNXRUNTIME_INCLUDE_PATHS "" CACHE PATH "Paths to ONNXRuntime includes")
104 | set(TRITON_ONNX_TENSORRT_REPO_TAG "" CACHE STRING "Tag for onnx-tensorrt repo")
105 | set(TRT_VERSION "" CACHE STRING "TRT version for this build.")
106 | set(TRITON_ONNXRUNTIME_LIB_PATHS "" CACHE PATH "Paths to ONNXRuntime libraries")
107 | 
108 | set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE STRING "Git repository to pull from")
109 | set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
110 | set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
111 | set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
112 | 
113 | # Use C++17 standard as Triton's minimum required.
114 | set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.")
115 | 
116 | if (WIN32)
117 |   if(TRITON_ENABLE_ONNXRUNTIME_OPENVINO)
118 |     message(FATAL_ERROR
119 |       "TRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON not supported for Windows")
120 |   endif()
121 | endif() # WIN32
122 | 
123 | if (NOT TRITON_ENABLE_GPU)
124 |   if (TRITON_ENABLE_ONNXRUNTIME_TENSORRT)
125 |     message(FATAL_ERROR "TRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON requires TRITON_ENABLE_GPU=ON")
126 |   endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT
127 | endif() # NOT TRITON_ENABLE_GPU
128 | 
129 | if(NOT CMAKE_BUILD_TYPE)
130 |   set(CMAKE_BUILD_TYPE Release)
131 | endif()
132 | 
133 | set(TRITON_ONNXRUNTIME_DOCKER_BUILD OFF)
134 | # Download onnxruntime
135 | include(cmake/download_onnxruntime.cmake)
136 | if(TRITON_ONNXRUNTIME_LIB_PATHS STREQUAL "")
137 |   set(TRITON_ONNXRUNTIME_DOCKER_BUILD ON)
138 | endif()
139 | 
140 | message(STATUS "Using Onnxruntime docker: ${TRITON_ONNXRUNTIME_DOCKER_BUILD}")
141 | 
142 | if(NOT TRITON_ONNXRUNTIME_DOCKER_BUILD)
143 |   find_library(ONNXRUNTIME_LIBRARY NAMES onnxruntime PATHS ${TRITON_ONNXRUNTIME_LIB_PATHS} REQUIRED)
144 |   if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
145 |     find_library(OV_LIBRARY
146 |       NAMES openvino
147 |       PATHS ${TRITON_ONNXRUNTIME_LIB_PATHS})
148 |   endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
149 | 
150 | else()
151 | 
152 |   if(NOT TRITON_BUILD_CONTAINER AND NOT TRITON_BUILD_CONTAINER_VERSION)
153 |     message(FATAL_ERROR
154 |       "TRITON_BUILD_ONNXRUNTIME_VERSION requires TRITON_BUILD_CONTAINER or TRITON_BUILD_CONTAINER_VERSION")
155 |   endif()
156 | 
157 |   if(NOT TRITON_BUILD_CONTAINER)
158 |     set(TRITON_BUILD_CONTAINER "nvcr.io/nvidia/tritonserver:${TRITON_BUILD_CONTAINER_VERSION}-py3-min")
159 |   endif()
160 | 
161 |   set(TRITON_ONNXRUNTIME_DOCKER_IMAGE "tritonserver_onnxruntime")
162 |   set(TRITON_ONNXRUNTIME_DOCKER_MEMORY "$<IF:$<BOOL:WIN32>,32g,8g>")
163 |   set(TRITON_ONNXRUNTIME_INCLUDE_PATHS "${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/include")
164 |   set(TRITON_ONNXRUNTIME_LIB_PATHS "${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/lib")
165 |   if (WIN32)
166 |     set(ONNXRUNTIME_LIBRARY "onnxruntime")
167 |   else()
168 |     set(ONNXRUNTIME_LIBRARY "libonnxruntime.so")
169 |   endif() # WIN32
170 |   if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
171 |     set(OV_LIBRARY "libopenvino.so")
172 |   endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
173 | endif()
174 | 
175 | #
176 | # Dependencies
177 | #
178 | # FetchContent's composability isn't very good. We must include the
179 | # transitive closure of all repos so that we can override the tag.
180 | #
181 | include(FetchContent)
182 | 
183 | FetchContent_Declare(
184 |   repo-common
185 |   GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/common.git
186 |   GIT_TAG ${TRITON_COMMON_REPO_TAG}
187 |   GIT_SHALLOW ON
188 | )
189 | FetchContent_Declare(
190 |   repo-core
191 |   GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
192 |   GIT_TAG ${TRITON_CORE_REPO_TAG}
193 |   GIT_SHALLOW ON
194 | )
195 | FetchContent_Declare(
196 |   repo-backend
197 |   GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/backend.git
198 |   GIT_TAG ${TRITON_BACKEND_REPO_TAG}
199 |   GIT_SHALLOW ON
200 | )
201 | FetchContent_MakeAvailable(repo-common repo-core repo-backend)
202 | 
203 | #
204 | # CUDA
205 | #
206 | if(${TRITON_ENABLE_GPU})
207 |   find_package(CUDAToolkit REQUIRED)
208 | endif() # TRITON_ENABLE_GPU
209 | 
210 | #
211 | # Shared library implementing the Triton Backend API
212 | #
213 | configure_file(src/libtriton_onnxruntime.ldscript libtriton_onnxruntime.ldscript COPYONLY)
214 | 
215 | add_library(
216 |   triton-onnxruntime-backend SHARED
217 |   src/onnxruntime.cc
218 |   src/onnxruntime_loader.cc
219 |   src/onnxruntime_loader.h
220 |   src/onnxruntime_utils.cc
221 |   src/onnxruntime_utils.h
222 | )
223 | 
224 | add_library(
225 |   TritonOnnxRuntimeBackend::triton-onnxruntime-backend ALIAS triton-onnxruntime-backend
226 | )
227 | 
228 | target_include_directories(
229 |   triton-onnxruntime-backend
230 |   PRIVATE
231 |     ${CMAKE_CURRENT_SOURCE_DIR}/src
232 |     ${TRITON_ONNXRUNTIME_INCLUDE_PATHS}
233 | )
234 | 
235 | target_compile_features(triton-onnxruntime-backend PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
236 | target_compile_options(
237 |   triton-onnxruntime-backend PRIVATE
238 |   $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
239 |     -Wall -Wextra -Wno-unused-parameter -Wno-type-limits>
240 |   $<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
241 | )
242 | 
243 | if(${TRITON_ENABLE_GPU})
244 |   target_compile_definitions(
245 |     triton-onnxruntime-backend
246 |     PRIVATE TRITON_ENABLE_GPU=1
247 |   )
248 | endif() # TRITON_ENABLE_GPU
249 | if(${TRITON_ENABLE_ONNXRUNTIME_TENSORRT})
250 |   target_compile_definitions(
251 |     triton-onnxruntime-backend
252 |     PRIVATE TRITON_ENABLE_ONNXRUNTIME_TENSORRT=1
253 |   )
254 | endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT
255 | if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
256 |   target_compile_definitions(
257 |     triton-onnxruntime-backend
258 |     PRIVATE TRITON_ENABLE_ONNXRUNTIME_OPENVINO=1
259 |   )
260 | endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
261 | 
262 | if (WIN32)
263 | set_target_properties(
264 |   triton-onnxruntime-backend
265 |   PROPERTIES
266 |     POSITION_INDEPENDENT_CODE ON
267 |     OUTPUT_NAME triton_onnxruntime
268 |     SKIP_BUILD_RPATH TRUE
269 |     BUILD_WITH_INSTALL_RPATH TRUE
270 |     INSTALL_RPATH_USE_LINK_PATH FALSE
271 |     INSTALL_RPATH "$\{ORIGIN\}"
272 | )
273 | else ()
274 | set_target_properties(
275 |   triton-onnxruntime-backend
276 |   PROPERTIES
277 |     POSITION_INDEPENDENT_CODE ON
278 |     OUTPUT_NAME triton_onnxruntime
279 |     SKIP_BUILD_RPATH TRUE
280 |     BUILD_WITH_INSTALL_RPATH TRUE
281 |     INSTALL_RPATH_USE_LINK_PATH FALSE
282 |     INSTALL_RPATH "$\{ORIGIN\}"
283 |     LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_onnxruntime.ldscript
284 |     LINK_FLAGS "-Wl,--version-script libtriton_onnxruntime.ldscript"
285 | )
286 | endif()
287 | 
288 | FOREACH(p ${TRITON_ONNXRUNTIME_LIB_PATHS})
289 |   target_link_directories(
290 |     triton-onnxruntime-backend
291 |     PRIVATE ${p}
292 |   )
293 | ENDFOREACH(p)
294 | 
295 | target_link_libraries(
296 |   triton-onnxruntime-backend
297 |   PRIVATE
298 |     triton-core-serverapi   # from repo-core
299 |     triton-core-backendapi  # from repo-core
300 |     triton-core-serverstub  # from repo-core
301 |     triton-backend-utils    # from repo-backend
302 |     ${TRITON_ONNXRUNTIME_LDFLAGS}
303 |     ${ONNXRUNTIME_LIBRARY}
304 | )
305 | 
306 | if(${TRITON_ENABLE_GPU})
307 |   target_link_libraries(
308 |     triton-onnxruntime-backend
309 |     PRIVATE
310 |       CUDA::cudart
311 |   )
312 | endif() # TRITON_ENABLE_GPU
313 | 
314 | if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
315 |   target_link_libraries(
316 |     triton-onnxruntime-backend
317 |     PRIVATE
318 |       ${OV_LIBRARY}
319 |   )
320 | endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
321 | 
322 | #
323 | # Build the ONNX Runtime libraries using docker.
324 | #
325 | if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
326 |   set(_GEN_FLAGS "")
327 |   if(NOT ${TRITON_BUILD_TARGET_PLATFORM} STREQUAL "")
328 |     set(_GEN_FLAGS ${_GEN_FLAGS} "--target-platform=${TRITON_BUILD_TARGET_PLATFORM}")
329 |   endif() # TRITON_BUILD_TARGET_PLATFORM
330 |   if(NOT ${TRITON_BUILD_CUDA_VERSION} STREQUAL "")
331 |     set(_GEN_FLAGS ${_GEN_FLAGS} "--cuda-version=${TRITON_BUILD_CUDA_VERSION}")
332 |   endif() # TRITON_BUILD_CUDA_VERSION
333 |   if(NOT ${TRITON_BUILD_CUDA_HOME} STREQUAL "")
334 |     set(_GEN_FLAGS ${_GEN_FLAGS} "--cuda-home=${TRITON_BUILD_CUDA_HOME}")
335 |   endif() # TRITON_BUILD_CUDA_HOME
336 |   if(NOT ${TRITON_BUILD_CUDNN_HOME} STREQUAL "")
337 |     set(_GEN_FLAGS ${_GEN_FLAGS} "--cudnn-home=${TRITON_BUILD_CUDNN_HOME}")
338 |   endif() # TRITON_BUILD_CUDNN_HOME
339 |   if(NOT ${TRITON_BUILD_TENSORRT_HOME} STREQUAL "")
340 |     set(_GEN_FLAGS ${_GEN_FLAGS} "--tensorrt-home=${TRITON_BUILD_TENSORRT_HOME}")
341 |   endif() # TRITON_BUILD_TENSORRT_HOME
342 |   if(${TRITON_ENABLE_ONNXRUNTIME_TENSORRT})
343 |     set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-tensorrt")
344 |   endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT
345 |   if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
346 |     set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}")
347 |   endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
348 | 
349 |   set(ENABLE_GPU_EXTRA_ARGS "")
350 |   if(${TRITON_ENABLE_GPU})
351 |     set(ENABLE_GPU_EXTRA_ARGS "--enable-gpu")
352 |   endif() # TRITON_ENABLE_GPU
353 | 
354 |   if (WIN32)
355 |     add_custom_command(
356 |       OUTPUT
357 |         onnxruntime/lib/${ONNXRUNTIME_LIBRARY}
358 |       COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
359 |       COMMAND docker build --memory ${TRITON_ONNXRUNTIME_DOCKER_MEMORY} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR}
360 |       COMMAND powershell.exe -noprofile -c "docker rm onnxruntime_backend_ort > $null 2>&1; if ($LASTEXITCODE) { 'error ignored...' }; exit 0"
361 |       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}
362 |       COMMAND rmdir /s/q onnxruntime
363 |       COMMAND docker cp onnxruntime_backend_ort:/opt/onnxruntime onnxruntime
364 |       COMMAND docker rm onnxruntime_backend_ort
365 |       COMMENT "Building ONNX Runtime"
366 |     )
367 |   else()
368 |     add_custom_command(
369 |       OUTPUT
370 |         onnxruntime/lib/${ONNXRUNTIME_LIBRARY}
371 |       COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py  --ort-build-config="${CMAKE_BUILD_TYPE}" --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
372 |       COMMAND docker build --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR}
373 |       COMMAND docker rm onnxruntime_backend_ort || echo 'error ignored...' || true
374 |       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}
375 |       COMMAND rm -fr onnxruntime
376 |       COMMAND docker cp onnxruntime_backend_ort:/opt/onnxruntime onnxruntime
377 |       COMMAND docker rm onnxruntime_backend_ort
378 |       COMMENT "Building ONNX Runtime"
379 |     )
380 |   endif() # WIN32
381 | 
382 |   add_custom_target(ort_target DEPENDS onnxruntime/lib/${ONNXRUNTIME_LIBRARY})
383 |   add_library(onnxruntime-library SHARED IMPORTED GLOBAL)
384 |   add_dependencies(onnxruntime-library ort_target)
385 |   add_dependencies(triton-onnxruntime-backend onnxruntime-library)
386 | 
387 |   if (WIN32)
388 |     set_target_properties(
389 |       onnxruntime-library
390 |       PROPERTIES
391 |         IMPORTED_LOCATION onnxruntime/bin/${ONNXRUNTIME_LIBRARY}
392 |     )
393 |   else()
394 |     set_target_properties(
395 |       onnxruntime-library
396 |       PROPERTIES
397 |         IMPORTED_LOCATION onnxruntime/lib/${ONNXRUNTIME_LIBRARY}
398 |     )
399 |   endif() # WIN32
400 | endif() # TRITON_ONNXRUNTIME_DOCKER_BUILD
401 | 
402 | #
403 | # Install
404 | #
405 | include(GNUInstallDirs)
406 | set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonOnnxRuntimeBackend)
407 | 
408 | install(
409 |   TARGETS
410 |     triton-onnxruntime-backend
411 |   EXPORT
412 |     triton-onnxruntime-backend-targets
413 |   LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime
414 |   RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime
415 | )
416 | 
417 | # For Jetson, we build the onnxruntime backend once and re-use
418 | # that tar file. We copy over the libraries and other requirements
419 | # prior to running this build and therefore these set of install
420 | # commands are not needed.
421 | if(TRITON_ONNXRUNTIME_DOCKER_BUILD OR DEFINED TRITON_ONNXRUNTIME_PACKAGE_URL)
422 |   install(
423 |     DIRECTORY
424 |       ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/
425 |     DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime
426 |     PATTERN *lib EXCLUDE
427 |     PATTERN *bin EXCLUDE
428 |     PATTERN *include EXCLUDE
429 |     PATTERN *test EXCLUDE
430 |   )
431 | 
432 |   install(
433 |     DIRECTORY
434 |       ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/bin/
435 |     USE_SOURCE_PERMISSIONS
436 |     DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime
437 |   )
438 | 
439 |   if (NOT WIN32)
440 |     install(
441 |       DIRECTORY
442 |         ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/lib/
443 |       USE_SOURCE_PERMISSIONS
444 |       DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime
445 |     )
446 | 
447 |     install(
448 |       DIRECTORY
449 |         ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/test
450 |       USE_SOURCE_PERMISSIONS
451 |       DESTINATION ${CMAKE_INSTALL_PREFIX}
452 |     )
453 |   endif() # NOT WIN32
454 | endif() # TRITON_ONNXRUNTIME_DOCKER_BUILD
455 | 
456 | install(
457 |   EXPORT
458 |     triton-onnxruntime-backend-targets
459 |   FILE
460 |     TritonOnnxRuntimeBackendTargets.cmake
461 |   NAMESPACE
462 |     TritonOnnxRuntimeBackend::
463 |   DESTINATION
464 |     ${INSTALL_CONFIGDIR}
465 | )
466 | 
467 | include(CMakePackageConfigHelpers)
468 | configure_package_config_file(
469 |   ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonOnnxRuntimeBackendConfig.cmake.in
470 |   ${CMAKE_CURRENT_BINARY_DIR}/TritonOnnxRuntimeBackendConfig.cmake
471 |   INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
472 | )
473 | 
474 | install(
475 |   FILES
476 |   ${CMAKE_CURRENT_BINARY_DIR}/TritonOnnxRuntimeBackendConfig.cmake
477 |   DESTINATION ${INSTALL_CONFIGDIR}
478 | )
479 | 
480 | #
481 | # Export from build tree
482 | #
483 | export(
484 |   EXPORT triton-onnxruntime-backend-targets
485 |   FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonOnnxRuntimeBackendTargets.cmake
486 |   NAMESPACE TritonOnnxRuntimeBackend::
487 | )
488 | 
489 | export(PACKAGE TritonOnnxRuntimeBackend)
490 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions
 5 | are met:
 6 |  * Redistributions of source code must retain the above copyright
 7 |    notice, this list of conditions and the following disclaimer.
 8 |  * Redistributions in binary form must reproduce the above copyright
 9 |    notice, this list of conditions and the following disclaimer in the
10 |    documentation and/or other materials provided with the distribution.
11 |  * Neither the name of NVIDIA CORPORATION nor the names of its
12 |    contributors may be used to endorse or promote products derived
13 |    from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!--
  2 | # Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | #  * Redistributions of source code must retain the above copyright
  8 | #    notice, this list of conditions and the following disclaimer.
  9 | #  * Redistributions in binary form must reproduce the above copyright
 10 | #    notice, this list of conditions and the following disclaimer in the
 11 | #    documentation and/or other materials provided with the distribution.
 12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 13 | #    contributors may be used to endorse or promote products derived
 14 | #    from this software without specific prior written permission.
 15 | #
 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | -->
 28 | 
 29 | [![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
 30 | 
 31 | # ONNX Runtime Backend
 32 | 
 33 | The Triton backend for the [ONNX
 34 | Runtime](https://github.com/microsoft/onnxruntime). You can learn more
 35 | about Triton backends in the [backend
 36 | repo](https://github.com/triton-inference-server/backend). Ask
 37 | questions or report problems on the [issues
 38 | page](https://github.com/triton-inference-server/onnxruntime_backend/issues).
 39 | 
 40 | Use a recent cmake to build and install in a local directory.
 41 | Typically you will want to build an appropriate ONNX Runtime
 42 | implementation as part of the build. You do this by specifying a ONNX
 43 | Runtime version and a Triton container version that you want to use
 44 | with the backend. You can find the combination of versions used in a
 45 | particular Triton release in the TRITON_VERSION_MAP at the top of
 46 | build.py in the branch matching the Triton release you are interested
 47 | in. For example, to build the ONNX Runtime backend for Triton 23.04,
 48 | use the versions from TRITON_VERSION_MAP in the [r23.04 branch of
 49 | build.py](https://github.com/triton-inference-server/server/blob/r23.04/build.py#L73).
 50 | 
 51 | ```
 52 | $ mkdir build
 53 | $ cd build
 54 | $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_ONNXRUNTIME_VERSION=1.14.1 -DTRITON_BUILD_CONTAINER_VERSION=23.04 ..
 55 | $ make install
 56 | ```
 57 | 
 58 | The resulting install/backends/onnxruntime directory can be added to a
 59 | Triton installation as /opt/tritonserver/backends/onnxruntime.
 60 | 
 61 | The following required Triton repositories will be pulled and used in
 62 | the build. By default the "main" branch/tag will be used for each repo
 63 | but the listed CMake argument can be used to override.
 64 | 
 65 | * triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag]
 66 | * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
 67 | * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
 68 | 
 69 | You can add TensorRT support to the ONNX Runtime backend by using
 70 | -DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON. You can add OpenVino support
 71 | by using -DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON
 72 | -DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=\<version\>, where
 73 | \<version\> is the OpenVino version to use and should match the
 74 | TRITON_VERSION_MAP entry as described above. So, to build with both
 75 | TensorRT and OpenVino support:
 76 | 
 77 | ```
 78 | $ mkdir build
 79 | $ cd build
 80 | $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_ONNXRUNTIME_VERSION=1.14.1 -DTRITON_BUILD_CONTAINER_VERSION=23.04 -DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON -DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON -DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=2021.2.200 ..
 81 | $ make install
 82 | ```
 83 | 
 84 | 
 85 | ## ONNX Runtime with TensorRT optimization
 86 | TensorRT can be used in conjunction with an ONNX model to further optimize the
 87 | performance. To enable TensorRT optimization you must set the model configuration
 88 | appropriately. There are several optimizations available for TensorRT, like
 89 | selection of the compute precision and workspace size. The optimization
 90 | parameters and their description are as follows.
 91 | 
 92 | 
 93 | * `precision_mode`: The precision used for optimization. Allowed values are "FP32", "FP16" and "INT8". Default value is "FP32".
 94 | * `max_workspace_size_bytes`: The maximum GPU memory the model can use temporarily during execution. Default value is 1GB.
 95 | * `int8_calibration_table_name`: Specify INT8 calibration table name. Applicable when precision_mode=="INT8" and the models do not contain Q/DQ nodes. If calibration table is provided for model with Q/DQ nodes then ORT session creation will fail.
 96 | * `int8_use_native_calibration_table`: Calibration table to use. Allowed values are 1 (use native TensorRT generated calibration table) and 0 (use ORT generated calibration table). Default is 0. **Note: Latest calibration table file needs to be copied to trt_engine_cache_path before inference. Calibration table is specific to models and calibration data sets. Whenever new calibration table is generated, old file in the path should be cleaned up or be replaced.
 97 | * `trt_engine_cache_enable`: Enable engine caching.
 98 | * `trt_engine_cache_path`: Specify engine cache path.
 99 | 
100 | To explore the usage of more parameters, follow the mapping table below and
101 | check [ONNX Runtime doc](https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#execution-provider-options) for detail.
102 | 
103 | > Please link to the latest ONNX Runtime binaries in CMake or build from
104 | [main branch of ONNX Runtime](https://github.com/microsoft/onnxruntime/tree/main) to enable latest options.
105 | 
106 | ### Parameter mapping between ONNX Runtime and Triton ONNXRuntime Backend
107 | 
108 | | Key in Triton model configuration | Value in Triton model config                        | Corresponding TensorRT EP option in ONNX Runtime | Type   |
109 | | --------------------------------- | --------------------------------------------------- | :----------------------------------------------- | :----- |
110 | | max_workspace_size_bytes          | e.g: "4294967296"                                   | trt_max_workspace_size                           | int    |
111 | | trt_max_partition_iterations      | e.g: "1000"                                         | trt_max_partition_iterations                     | int    |
112 | | trt_min_subgraph_size             | e.g: "1"                                            | trt_min_subgraph_size                            | int    |
113 | | precision_mode                    | "FP16"                                              | trt_fp16_enable                                  | bool   |
114 | | precision_mode                    | "INT8"                                              | trt_int8_enable                                  | bool   |
115 | | int8_calibration_table_name       |                                                     | trt_int8_calibration_table_name                  | string |
116 | | int8_use_native_calibration_table | e.g: "1" or "true", "0" or "false"                  | trt_int8_use_native_calibration_table            | bool   |
117 | | trt_dla_enable                    |                                                     | trt_dla_enable                                   | bool   |
118 | | trt_dla_core                      | e.g: "0"                                            | trt_dla_core                                     | int    |
119 | | trt_engine_cache_enable           | e.g: "1" or "true", "0" or "false"                  | trt_engine_cache_enable                          | bool   |
120 | | trt_engine_cache_path             |                                                     | trt_engine_cache_path                            | string |
121 | | trt_engine_cache_prefix           |                                                     | trt_engine_cache_prefix                          | string |
122 | | trt_dump_subgraphs                | e.g: "1" or "true", "0" or "false"                  | trt_dump_subgraphs                               | bool   |
123 | | trt_force_sequential_engine_build | e.g: "1" or "true", "0" or "false"                  | trt_force_sequential_engine_build                | bool   |
124 | | trt_context_memory_sharing_enable | e.g: "1" or "true", "0" or "false"                  | trt_context_memory_sharing_enable                | bool   |
125 | | trt_layer_norm_fp32_fallback      | e.g: "1" or "true", "0" or "false"                  | trt_layer_norm_fp32_fallback                     | bool   |
126 | | trt_timing_cache_enable           | e.g: "1" or "true", "0" or "false"                  | trt_timing_cache_enable                          | bool   |
127 | | trt_timing_cache_path             |                                                     | trt_timing_cache_path                            | string |
128 | | trt_force_timing_cache            | e.g: "1" or "true", "0" or "false"                  | trt_force_timing_cache                           | bool   |
129 | | trt_detailed_build_log            | e.g: "1" or "true", "0" or "false"                  | trt_detailed_build_log                           | bool   |
130 | | trt_build_heuristics_enable       | e.g: "1" or "true", "0" or "false"                  | trt_build_heuristics_enable                      | bool   |
131 | | trt_sparsity_enable               | e.g: "1" or "true", "0" or "false"                  | trt_sparsity_enable                              | bool   |
132 | | trt_builder_optimization_level    | e.g: "3"                                            | trt_builder_optimization_level                   | int    |
133 | | trt_auxiliary_streams             | e.g: "-1"                                           | trt_auxiliary_streams                            | int    |
134 | | trt_tactic_sources                | e.g: "-CUDNN,+CUBLAS";                              | trt_tactic_sources                               | string |
135 | | trt_extra_plugin_lib_paths        |                                                     | trt_extra_plugin_lib_paths                       | string |
136 | | trt_profile_min_shapes            | e.g: "input1:dim1xdimd2...,input2:dim1xdim2...,..." | trt_profile_min_shapes                           | string |
137 | | trt_profile_max_shapes            | e.g: "input1:dim1xdimd2...,input2:dim1xdim2...,..." | trt_profile_max_shapes                           | string |
138 | | trt_profile_opt_shapes            | e.g: "input1:dim1xdimd2...,input2:dim1xdim2...,..." | trt_profile_opt_shapes                           | string |
139 | | trt_cuda_graph_enable             | e.g: "1" or "true", "0" or "false"                  | trt_cuda_graph_enable                            | bool   |
140 | | trt_dump_ep_context_model         | e.g: "1" or "true", "0" or "false"                  | trt_dump_ep_context_model                        | bool   |
141 | | trt_ep_context_file_path          |                                                     | trt_ep_context_file_path                         | string |
142 | | trt_ep_context_embed_mode         | e.g: "1"                                            | trt_ep_context_embed_mode                        | int    |
143 | 
144 | The section of model config file specifying these parameters will look like:
145 | 
146 | ```
147 | .
148 | .
149 | .
150 | optimization { execution_accelerators {
151 |   gpu_execution_accelerator : [ {
152 |     name : "tensorrt"
153 |     parameters { key: "precision_mode" value: "FP16" }
154 |     parameters { key: "max_workspace_size_bytes" value: "1073741824" }}
155 |     parameters { key: "trt_engine_cache_enable" value: "1" }}
156 |   ]
157 | }}
158 | .
159 | .
160 | .
161 | ```
162 | 
163 | ## ONNX Runtime with CUDA Execution Provider optimization
164 | When GPU is enabled for ORT, CUDA execution provider is enabled. If TensorRT is
165 | also enabled then CUDA EP is treated as a fallback option (only comes into
166 | picture for nodes which TensorRT cannot execute). If TensorRT is not enabled
167 | then CUDA EP is the primary EP which executes the models. ORT enabled
168 | configuring options for CUDA EP to further optimize based on the specific model
169 | and user scenarios. There are several optimizations available, please refer to
170 | the [ONNX Runtime doc](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider)
171 | for more details. To enable CUDA EP optimization you must set the model
172 | configuration appropriately:
173 | 
174 | ```
175 | optimization { execution_accelerators {
176 |   gpu_execution_accelerator : [ {
177 |     name : "cuda"
178 |     parameters { key: "cudnn_conv_use_max_workspace" value: "0" }
179 |     parameters { key: "use_ep_level_unified_stream" value: "1" }}
180 |   ]
181 | }}
182 | ```
183 | 
184 | ### Deprecated Parameters
185 | The way to specify these specific parameters as shown below is deprecated. For
186 | backward compatibility, these parameters are still supported. Please use the
187 | above method to specify the parameters.
188 | 
189 | * `cudnn_conv_algo_search`: CUDA Convolution algorithm search configuration.
190 | Available options are 0 - EXHAUSTIVE (expensive exhaustive benchmarking using
191 | cudnnFindConvolutionForwardAlgorithmEx). This is also the default option,
192 | 1 - HEURISTIC (lightweight heuristic based search using
193 | cudnnGetConvolutionForwardAlgorithm_v7), 2 - DEFAULT (default algorithm using
194 | CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM)
195 | 
196 | * `gpu_mem_limit`: CUDA memory limit. To use all possible memory pass in maximum
197 | size_t. Defaults to SIZE_MAX.
198 | 
199 | * `arena_extend_strategy`: Strategy used to grow the memory arena. Available
200 | options are: 0 = kNextPowerOfTwo, 1 = kSameAsRequested. Defaults to 0.
201 | 
202 | * `do_copy_in_default_stream`: Flag indicating if copying needs to take place on
203 | the same stream as the compute stream in the CUDA EP. Available options are:
204 | 0 = Use separate streams for copying and compute, 1 = Use the same stream for
205 | copying and compute. Defaults to 1.
206 | 
207 | In the model config file, specifying these parameters will look like:
208 | 
209 | ```
210 | .
211 | .
212 | .
213 | parameters { key: "cudnn_conv_algo_search" value: { string_value: "0" } }
214 | parameters { key: "gpu_mem_limit" value: { string_value: "4294967200" } }
215 | .
216 | .
217 | .
218 | 
219 | ```
220 | 
221 | 
222 | ## ONNX Runtime with OpenVINO optimization
223 | 
224 | [OpenVINO](https://docs.openvinotoolkit.org/latest/index.html) can be
225 | used in conjunction with an ONNX model to further optimize
226 | performance. To enable OpenVINO optimization you must set the model
227 | configuration as shown below.
228 | 
229 | ```
230 | .
231 | .
232 | .
233 | optimization { execution_accelerators {
234 |   cpu_execution_accelerator : [ {
235 |     name : "openvino"
236 |   } ]
237 | }}
238 | .
239 | .
240 | .
241 | ```
242 | 
243 | ## Other Optimization Options with ONNX Runtime
244 | 
245 | Details regarding when to use these options and what to expect from them can be
246 | found [here](https://onnxruntime.ai/docs/performance/tune-performance.html)
247 | 
248 | ### Model Config Options
249 | * `intra_op_thread_count`: Sets the number of threads used to parallelize the
250 | execution within nodes. A value of 0 means ORT will pick a default which is
251 | number of cores.
252 | * `inter_op_thread_count`: Sets the number of threads used to parallelize the
253 | execution of the graph (across nodes). If sequential execution is enabled this
254 | value is ignored.
255 | A value of 0 means ORT will pick a default which is number of cores.
256 | * `execution_mode`: Controls whether operators in the graph are executed
257 | sequentially or in parallel. Usually when the model has many branches, setting
258 | this option to 1 .i.e. "parallel" will give you better performance. Default is
259 | 0 which is "sequential execution."
260 | * `level`: Refers to the graph optimization level. By default all optimizations
261 | are enabled. Allowed values are -1, 1 and 2. -1 refers to BASIC optimizations,
262 | 1 refers to basic plus extended optimizations like fusions and 2 refers to all
263 | optimizations being disabled. Please find the details
264 | [here](https://onnxruntime.ai/docs/performance/graph-optimizations.html).
265 | 
266 | ```
267 | optimization {
268 |   graph : {
269 |     level : 1
270 | }}
271 | 
272 | parameters { key: "intra_op_thread_count" value: { string_value: "0" } }
273 | parameters { key: "execution_mode" value: { string_value: "0" } }
274 | parameters { key: "inter_op_thread_count" value: { string_value: "0" } }
275 | 
276 | ```
277 | * `enable_mem_arena`: Use 1 to enable the arena and 0 to disable. See
278 | [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#a0bbd62df2b3c119636fba89192240593)
279 | for more information.
280 | * `enable_mem_pattern`: Use 1 to enable memory pattern and 0 to disable.
281 | See [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#ad13b711736956bf0565fea0f8d7a5d75)
282 | for more information.
283 | * `memory.enable_memory_arena_shrinkage`:
284 | See [this](https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h)
285 | for more information.
286 | * `session.use_device_allocator_for_initializers`: Use "1" to enable using device allocator for allocating initialized tensor memory and "0" to disable. The default is "0". See [this](https://onnxruntime.ai/docs/get-started/with-c.html) for more information.
287 | 
288 | ### Command line options
289 | 
290 | #### Thread Pools
291 | 
292 | When intra and inter op threads is set to 0 or a value higher than 1, by default
293 | ORT creates threadpool per session. This may not be ideal in every scenario,
294 | therefore ORT also supports global threadpools. When global threadpools are
295 | enabled ORT creates 1 global threadpool which is shared by every session.
296 | Use the backend config to enable global threadpool. When global threadpool is
297 | enabled, intra and inter op num threads config should also be provided via
298 | backend config. Config values provided in model config will be ignored.
299 | 
300 | ```
301 | --backend-config=onnxruntime,enable-global-threadpool=<0,1>, --backend-config=onnxruntime,intra_op_thread_count=<int> , --backend-config=onnxruntime,inter_op_thread_count=<int>
302 | ```
303 | 
304 | #### Default Max Batch Size
305 | 
306 | The default-max-batch-size value is used for max_batch_size during
307 | [Autocomplete](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#auto-generated-model-configuration)
308 | when no other value is found. Assuming server was not launched with
309 | `--disable-auto-complete-config` command-line option, the onnxruntime backend
310 | will set the max_batch_size of the model to this default value under the
311 | following conditions:
312 | 
313 | 1. Autocomplete has determined the model is capable of batching requests.
314 | 2. max_batch_size is 0 in the model configuration or max_batch_size
315 |    is omitted from the model configuration.
316 | 
317 | If max_batch_size > 1 and no
318 | [scheduler](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#scheduling-and-batching)
319 | is provided, the dynamic batch scheduler will be used.
320 | 
321 | ```
322 | --backend-config=onnxruntime,default-max-batch-size=<int>
323 | ```
324 | 
325 | The default value of `default-max-batch-size` is 4.
326 | 


--------------------------------------------------------------------------------
/cmake/TritonOnnxRuntimeBackendConfig.cmake.in:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | include(CMakeFindDependencyMacro)
28 | 
29 | get_filename_component(
30 |   TRITONONNXRUNTIMEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
31 | )
32 | 
33 | list(APPEND CMAKE_MODULE_PATH ${TRITONONNXRUNTIMEBACKEND_CMAKE_DIR})
34 | 
35 | if(NOT TARGET TritonOnnxRuntimeBackend::triton-onnxruntime-backend)
36 |   include("${TRITONONNXRUNTIMEBACKEND_CMAKE_DIR}/TritonOnnxRuntimeBackendTargets.cmake")
37 | endif()
38 | 
39 | set(TRITONONNXRUNTIMEBACKEND_LIBRARIES TritonOnnxRuntimeBackend::triton-onnxruntime-backend)
40 | 


--------------------------------------------------------------------------------
/cmake/download_onnxruntime.cmake:
--------------------------------------------------------------------------------
 1 | if(DEFINED TRITON_ONNXRUNTIME_PACKAGE_URL)
 2 | 
 3 |   set(DOWNLOAD_PATH "${CMAKE_BINARY_DIR}/_deps/downloads/onnxruntime.zip")
 4 |   set(EXTRACT_DIR "${CMAKE_BINARY_DIR}/onnxruntime")
 5 | 
 6 |   message(NOTICE "Downloading onnxruntime: ${TRITON_ONNXRUNTIME_PACKAGE_URL}")
 7 | 
 8 |   file(DOWNLOAD ${TRITON_ONNXRUNTIME_PACKAGE_URL} ${DOWNLOAD_PATH} SHOW_PROGRESS STATUS DOWNLOAD_STATUS)
 9 | 
10 |   # file(DOWNLOAD ... STATUS DOWNLOAD_STATUS) returns a list with 2 elements
11 |   list(GET DOWNLOAD_STATUS 0 DOWNLOAD_RESULT)
12 | 
13 |   if(NOT DOWNLOAD_RESULT EQUAL 0)
14 |     message(NOTICE "Failed to download: ${TRITON_ONNXRUNTIME_PACKAGE_URL}")
15 |   else()
16 |     message(NOTICE "Download successful: ${DOWNLOAD_PATH}" )
17 | 
18 |     file(ARCHIVE_EXTRACT INPUT ${DOWNLOAD_PATH} DESTINATION ${EXTRACT_DIR} VERBOSE )
19 | 
20 |     file(READ "${EXTRACT_DIR}/VERSION_NUMBER" DOWNLOADED_ONNXRUNTIME_VERSION)
21 |     if(${DOWNLOADED_ONNXRUNTIME_VERSION} VERSION_EQUAL ${TRITON_BUILD_ONNXRUNTIME_VERSION})
22 |       message(NOTICE "Downloaded onnxruntime version: ${DOWNLOADED_ONNXRUNTIME_VERSION}")
23 |       set(TRITON_ONNXRUNTIME_INCLUDE_PATHS ${EXTRACT_DIR}/include)
24 |       set(TRITON_ONNXRUNTIME_LIB_PATHS ${EXTRACT_DIR}/lib)
25 |     else()
26 |       message(NOTICE "Downloaded onnxruntime version: ${DOWNLOADED_ONNXRUNTIME_VERSION} does not match the required version: ${TRITON_BUILD_ONNXRUNTIME_VERSION}")
27 |     endif()
28 | 
29 |   endif(NOT DOWNLOAD_RESULT EQUAL 0)
30 | 
31 | endif(DEFINED TRITON_ONNXRUNTIME_PACKAGE_URL)
32 | 
33 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | [tool.codespell]
28 | # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
29 | # this is only to allow you to run codespell interactively
30 | skip = "./.git,./.github"
31 | # ignore short words, and typename parameters like OffsetT
32 | ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
33 | # use the 'clear' dictionary for unambiguous spelling mistakes
34 | builtin = "clear"
35 | # disable warnings about binary files and wrong encoding
36 | quiet-level = 3
37 | 
38 | [tool.isort]
39 | profile = "black"
40 | use_parentheses = true
41 | multi_line_output = 3
42 | include_trailing_comma = true
43 | force_grid_wrap = 0
44 | ensure_newline_before_comments = true
45 | line_length = 88
46 | balanced_wrapping = true
47 | indent = "    "
48 | skip = ["build"]
49 | 
50 | 


--------------------------------------------------------------------------------
/src/libtriton_onnxruntime.ldscript:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | {
27 |   global:
28 |     TRITONBACKEND_*;
29 |   local: *;
30 | };
31 | 


--------------------------------------------------------------------------------
/src/onnxruntime_loader.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "onnxruntime_loader.h"
 28 | 
 29 | #include <codecvt>
 30 | #include <future>
 31 | #include <locale>
 32 | #include <string>
 33 | #include <thread>
 34 | 
 35 | #include "onnxruntime_utils.h"
 36 | 
 37 | namespace triton { namespace backend { namespace onnxruntime {
 38 | 
 39 | std::unique_ptr<OnnxLoader> OnnxLoader::loader = nullptr;
 40 | 
 41 | OnnxLoader::~OnnxLoader()
 42 | {
 43 |   if (env_ != nullptr) {
 44 |     ort_api->ReleaseEnv(env_);
 45 |   }
 46 | }
 47 | 
 48 | TRITONSERVER_Error*
 49 | OnnxLoader::Init(common::TritonJson::Value& backend_config)
 50 | {
 51 |   if (loader == nullptr) {
 52 |     OrtEnv* env;
 53 |     // If needed, provide custom logger with
 54 |     // ort_api->CreateEnvWithCustomLogger()
 55 |     OrtStatus* status;
 56 |     OrtLoggingLevel logging_level =
 57 |         TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_VERBOSE)
 58 |             ? ORT_LOGGING_LEVEL_VERBOSE
 59 |         : TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_WARN)
 60 |             ? ORT_LOGGING_LEVEL_WARNING
 61 |             : ORT_LOGGING_LEVEL_ERROR;
 62 | 
 63 |     // Controls whether to enable global threadpool which will be shared across
 64 |     // sessions. Use this in conjunction with DisablePerSessionThreads API or
 65 |     // else the session will use it's own thread pool.
 66 |     bool global_threadpool_enabled = false;
 67 |     OrtThreadingOptions* threading_options = nullptr;
 68 | 
 69 |     // Read backend config
 70 |     triton::common::TritonJson::Value cmdline;
 71 |     if (backend_config.Find("cmdline", &cmdline)) {
 72 |       triton::common::TritonJson::Value value;
 73 |       std::string value_str;
 74 |       if (cmdline.Find("enable-global-threadpool", &value)) {
 75 |         RETURN_IF_ERROR(value.AsString(&value_str));
 76 |         RETURN_IF_ERROR(ParseBoolValue(value_str, &global_threadpool_enabled));
 77 | 
 78 |         if (global_threadpool_enabled) {
 79 |           // If provided by user, read intra and inter op num thread
 80 |           // configuration and set ThreadingOptions accordingly. If not, we use
 81 |           // default 0 which means value equal to number of cores will be used.
 82 |           RETURN_IF_ORT_ERROR(
 83 |               ort_api->CreateThreadingOptions(&threading_options));
 84 |           if (cmdline.Find("intra_op_thread_count", &value)) {
 85 |             int intra_op_num_threads = 0;
 86 |             RETURN_IF_ERROR(value.AsString(&value_str));
 87 |             RETURN_IF_ERROR(ParseIntValue(value_str, &intra_op_num_threads));
 88 |             if (intra_op_num_threads > 0) {
 89 |               RETURN_IF_ORT_ERROR(ort_api->SetGlobalIntraOpNumThreads(
 90 |                   threading_options, intra_op_num_threads));
 91 |             }
 92 |           }
 93 |           if (cmdline.Find("inter_op_thread_count", &value)) {
 94 |             int inter_op_num_threads = 0;
 95 |             RETURN_IF_ERROR(value.AsString(&value_str));
 96 |             RETURN_IF_ERROR(ParseIntValue(value_str, &inter_op_num_threads));
 97 |             if (inter_op_num_threads > 0) {
 98 |               RETURN_IF_ORT_ERROR(ort_api->SetGlobalInterOpNumThreads(
 99 |                   threading_options, inter_op_num_threads));
100 |             }
101 |           }
102 |         }
103 |       }
104 |     }
105 | 
106 |     if (global_threadpool_enabled && threading_options != nullptr) {
107 |       status = ort_api->CreateEnvWithGlobalThreadPools(
108 |           logging_level, "log", threading_options, &env);
109 |       ort_api->ReleaseThreadingOptions(threading_options);
110 |     } else {
111 |       status = ort_api->CreateEnv(logging_level, "log", &env);
112 |     }
113 | 
114 |     loader.reset(new OnnxLoader(env, global_threadpool_enabled));
115 |     RETURN_IF_ORT_ERROR(status);
116 |   } else {
117 |     return TRITONSERVER_ErrorNew(
118 |         TRITONSERVER_ERROR_ALREADY_EXISTS,
119 |         "OnnxLoader singleton already initialized");
120 |   }
121 | 
122 |   return nullptr;  // success
123 | }
124 | 
125 | void
126 | OnnxLoader::TryRelease(bool decrement_session_cnt)
127 | {
128 |   std::unique_ptr<OnnxLoader> lloader;
129 |   {
130 |     std::lock_guard<std::mutex> lk(loader->mu_);
131 |     if (decrement_session_cnt) {
132 |       loader->live_session_cnt_--;
133 |     }
134 | 
135 |     if (loader->closing_ && (loader->live_session_cnt_ == 0)) {
136 |       lloader.swap(loader);
137 |     }
138 |   }
139 | }
140 | 
141 | TRITONSERVER_Error*
142 | OnnxLoader::Stop()
143 | {
144 |   if (loader != nullptr) {
145 |     loader->closing_ = true;
146 |     TryRelease(false);
147 |   } else {
148 |     return TRITONSERVER_ErrorNew(
149 |         TRITONSERVER_ERROR_UNAVAILABLE,
150 |         "OnnxLoader singleton has not been initialized");
151 |   }
152 | 
153 |   return nullptr;  // success
154 | }
155 | 
156 | bool
157 | OnnxLoader::IsGlobalThreadPoolEnabled()
158 | {
159 |   if (loader != nullptr) {
160 |     return loader->global_threadpool_enabled_;
161 |   }
162 | 
163 |   return false;
164 | }
165 | 
166 | TRITONSERVER_Error*
167 | OnnxLoader::LoadSession(
168 |     const bool is_path, const std::string& model,
169 |     const OrtSessionOptions* session_options, OrtSession** session)
170 | {
171 | #ifdef _WIN32
172 |   std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
173 |   std::wstring ort_style_model_str = converter.from_bytes(model);
174 | #else
175 |   const auto& ort_style_model_str = model;
176 | #endif
177 |   if (loader != nullptr) {
178 |     {
179 |       std::lock_guard<std::mutex> lk(loader->mu_);
180 |       if (loader->closing_) {
181 |         return TRITONSERVER_ErrorNew(
182 |             TRITONSERVER_ERROR_UNAVAILABLE, "OnnxLoader has been stopped");
183 |       } else {
184 |         loader->live_session_cnt_++;
185 |       }
186 |     }
187 | 
188 |     OrtStatus* status = nullptr;
189 |     {
190 |       // [FIXME] Remove lock when ORT create session is thread safe [DLIS-4663]
191 |       static std::mutex ort_create_session_mu;
192 |       std::lock_guard<std::mutex> ort_lk(ort_create_session_mu);
193 | 
194 |       if (!is_path) {
195 |         status = ort_api->CreateSessionFromArray(
196 |             loader->env_, ort_style_model_str.c_str(), model.size(),
197 |             session_options, session);
198 |       } else {
199 |         status = ort_api->CreateSession(
200 |             loader->env_, ort_style_model_str.c_str(), session_options,
201 |             session);
202 |       }
203 |     }
204 | 
205 |     if (status != nullptr) {
206 |       TryRelease(true);
207 |     }
208 |     RETURN_IF_ORT_ERROR(status);
209 |   } else {
210 |     return TRITONSERVER_ErrorNew(
211 |         TRITONSERVER_ERROR_UNAVAILABLE,
212 |         "OnnxLoader singleton has not been initialized");
213 |   }
214 | 
215 |   return nullptr;  // success
216 | }
217 | 
218 | TRITONSERVER_Error*
219 | OnnxLoader::UnloadSession(OrtSession* session)
220 | {
221 |   if (loader != nullptr) {
222 |     ort_api->ReleaseSession(session);
223 |     TryRelease(true);
224 |   } else {
225 |     return TRITONSERVER_ErrorNew(
226 |         TRITONSERVER_ERROR_UNAVAILABLE,
227 |         "OnnxLoader singleton has not been initialized");
228 |   }
229 | 
230 |   return nullptr;  // success
231 | }
232 | 
233 | }}}  // namespace triton::backend::onnxruntime
234 | 


--------------------------------------------------------------------------------
/src/onnxruntime_loader.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | #pragma once
27 | 
28 | #include <onnxruntime_c_api.h>
29 | 
30 | #include <memory>
31 | #include <mutex>
32 | 
33 | #include "triton/backend/backend_common.h"
34 | #include "triton/core/tritonbackend.h"
35 | 
36 | namespace triton { namespace backend { namespace onnxruntime {
37 | 
38 | /// A singleton to load Onnx model because loading models requires
39 | /// Onnx Runtime environment which is unique per process
40 | class OnnxLoader {
41 |  public:
42 |   ~OnnxLoader();
43 | 
44 |   /// Initialize loader with default environment settings
45 |   static TRITONSERVER_Error* Init(common::TritonJson::Value& backend_config);
46 | 
47 |   /// Stop loader, and once all Onnx sessions are unloaded via UnloadSession()
48 |   /// the resource it allocated will be released
49 |   static TRITONSERVER_Error* Stop();
50 | 
51 |   /// Load a Onnx model from a path and return the corresponding
52 |   /// OrtSession.
53 |   ///
54 |   /// \param bool is_path If true 'model' is a path to the model file,
55 |   /// if false 'model' is the serialized model.
56 |   /// \param model The Onnx model or path to the model.
57 |   /// \param session_options The options to use when creating the session
58 |   /// \param session Returns the Onnx model session
59 |   /// \return Error status.
60 |   static TRITONSERVER_Error* LoadSession(
61 |       const bool is_path, const std::string& model,
62 |       const OrtSessionOptions* session_options, OrtSession** session);
63 | 
64 |   /// Unload a Onnx model session
65 |   ///
66 |   /// \param session The Onnx model session to be unloaded
67 |   static TRITONSERVER_Error* UnloadSession(OrtSession* session);
68 | 
69 |   /// Returns whether global thread pool is enabled.
70 |   /// If the loader is not initialized it returns false.
71 |   static bool IsGlobalThreadPoolEnabled();
72 | 
73 |  private:
74 |   OnnxLoader(OrtEnv* env, bool enable_global_threadpool = false)
75 |       : env_(env), global_threadpool_enabled_(enable_global_threadpool),
76 |         live_session_cnt_(0), closing_(false)
77 |   {
78 |   }
79 | 
80 |   /// Decrease 'live_session_cnt_' if 'decrement_session_cnt' is true, and then
81 |   /// release Onnx Runtime environment if it is closing and no live sessions
82 |   ///
83 |   /// \param decrement_session_cnt Whether to decrease the 'live_session_cnt_'
84 |   static void TryRelease(bool decrement_session_cnt);
85 | 
86 |   static std::unique_ptr<OnnxLoader> loader;
87 | 
88 |   OrtEnv* env_;
89 |   bool global_threadpool_enabled_;
90 |   std::mutex mu_;
91 |   size_t live_session_cnt_;
92 |   bool closing_;
93 | };
94 | 
95 | }}}  // namespace triton::backend::onnxruntime
96 | 


--------------------------------------------------------------------------------
/src/onnxruntime_utils.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "onnxruntime_utils.h"
 28 | 
 29 | namespace triton { namespace backend { namespace onnxruntime {
 30 | 
 31 | const OrtApi* ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
 32 | 
 33 | namespace {
 34 | 
 35 | std::string
 36 | OnnxTypeName(ONNXType onnx_type)
 37 | {
 38 |   switch (onnx_type) {
 39 |     case ONNX_TYPE_TENSOR:
 40 |       return "ONNX_TYPE_TENSOR";
 41 |     case ONNX_TYPE_SEQUENCE:
 42 |       return "ONNX_TYPE_SEQUENCE";
 43 |     case ONNX_TYPE_MAP:
 44 |       return "ONNX_TYPE_MAP";
 45 |     case ONNX_TYPE_OPAQUE:
 46 |       return "ONNX_TYPE_OPAQUE";
 47 |     case ONNX_TYPE_SPARSETENSOR:
 48 |       return "ONNX_TYPE_SPARSETENSOR";
 49 |     case ONNX_TYPE_UNKNOWN:
 50 |     default:
 51 |       break;
 52 |   }
 53 | 
 54 |   return "ONNX_TYPE_UNKNOWN";
 55 | }
 56 | 
 57 | enum class NameType {
 58 |   INPUT,
 59 |   OUTPUT,
 60 |   INITIALIZER,
 61 | };
 62 | 
 63 | TRITONSERVER_Error*
 64 | InputOutputInitializerNames(
 65 |     OrtSession* session, NameType type, std::set<std::string>& names)
 66 | {
 67 |   names.clear();
 68 | 
 69 |   size_t num_nodes;
 70 |   switch (type) {
 71 |     case NameType::INPUT:
 72 |       RETURN_IF_ORT_ERROR(ort_api->SessionGetInputCount(session, &num_nodes));
 73 |       break;
 74 |     case NameType::OUTPUT:
 75 |       RETURN_IF_ORT_ERROR(ort_api->SessionGetOutputCount(session, &num_nodes));
 76 |       break;
 77 |     case NameType::INITIALIZER:
 78 |       RETURN_IF_ORT_ERROR(
 79 |           ort_api->SessionGetOverridableInitializerCount(session, &num_nodes));
 80 |       break;
 81 |   }
 82 | 
 83 |   // iterate over all input / output nodes
 84 |   OrtAllocator* allocator;
 85 |   RETURN_IF_ORT_ERROR(ort_api->GetAllocatorWithDefaultOptions(&allocator));
 86 |   OrtStatus* onnx_status = nullptr;
 87 |   for (size_t i = 0; i < num_nodes; i++) {
 88 |     char* node_name = nullptr;
 89 |     switch (type) {
 90 |       case NameType::INPUT:
 91 |         onnx_status =
 92 |             ort_api->SessionGetInputName(session, i, allocator, &node_name);
 93 |         break;
 94 |       case NameType::OUTPUT:
 95 |         onnx_status =
 96 |             ort_api->SessionGetOutputName(session, i, allocator, &node_name);
 97 |         break;
 98 |       case NameType::INITIALIZER:
 99 |         onnx_status = ort_api->SessionGetOverridableInitializerName(
100 |             session, i, allocator, &node_name);
101 |         break;
102 |     }
103 | 
104 |     // Make a std::string copy of the name and then free 'node_name'
105 |     // since the ORT API makes us responsible for doing that.
106 |     std::string name(node_name);
107 |     auto free_status = ort_api->AllocatorFree(allocator, node_name);
108 |     if (free_status != nullptr) {
109 |       LOG_MESSAGE(
110 |           TRITONSERVER_LOG_ERROR,
111 |           (std::string("onnx runtime allocator free error:") +
112 |            std::to_string(ort_api->GetErrorCode(free_status)) +
113 |            ort_api->GetErrorMessage(free_status))
114 |               .c_str());
115 |       ort_api->ReleaseStatus(free_status);
116 |     }
117 | 
118 |     if (onnx_status != nullptr) {
119 |       break;
120 |     }
121 | 
122 |     names.emplace(std::move(name));
123 |   }
124 |   RETURN_IF_ORT_ERROR(onnx_status);
125 | 
126 |   return nullptr;  // success
127 | }
128 | 
129 | TRITONSERVER_Error*
130 | InputOutputInitializerInfos(
131 |     OrtSession* session, OrtAllocator* allocator, NameType type,
132 |     OnnxTensorInfoMap& infos)
133 | {
134 |   infos.clear();
135 | 
136 |   size_t num_nodes;
137 |   switch (type) {
138 |     case NameType::INPUT:
139 |       RETURN_IF_ORT_ERROR(ort_api->SessionGetInputCount(session, &num_nodes));
140 |       break;
141 |     case NameType::OUTPUT:
142 |       RETURN_IF_ORT_ERROR(ort_api->SessionGetOutputCount(session, &num_nodes));
143 |       break;
144 |     case NameType::INITIALIZER:
145 |       RETURN_IF_ORT_ERROR(
146 |           ort_api->SessionGetOverridableInitializerCount(session, &num_nodes));
147 |       break;
148 |   }
149 | 
150 |   // iterate over all nodes
151 |   for (size_t i = 0; i < num_nodes; i++) {
152 |     char* cname = nullptr;
153 |     switch (type) {
154 |       case NameType::INPUT:
155 |         RETURN_IF_ORT_ERROR(
156 |             ort_api->SessionGetInputName(session, i, allocator, &cname));
157 |         break;
158 |       case NameType::OUTPUT:
159 |         RETURN_IF_ORT_ERROR(
160 |             ort_api->SessionGetOutputName(session, i, allocator, &cname));
161 |         break;
162 |       case NameType::INITIALIZER:
163 |         RETURN_IF_ORT_ERROR(ort_api->SessionGetOverridableInitializerName(
164 |             session, i, allocator, &cname));
165 |         break;
166 |     }
167 | 
168 |     // Make a std::string copy of the name and then free 'cname' since
169 |     // the ORT API makes us responsible for doing that.
170 |     std::string name(cname);
171 |     auto free_status = ort_api->AllocatorFree(allocator, cname);
172 |     if (free_status != nullptr) {
173 |       LOG_MESSAGE(
174 |           TRITONSERVER_LOG_ERROR,
175 |           (std::string("onnx runtime allocator free error:") +
176 |            std::to_string(ort_api->GetErrorCode(free_status)) +
177 |            ort_api->GetErrorMessage(free_status))
178 |               .c_str());
179 |       ort_api->ReleaseStatus(free_status);
180 |     }
181 | 
182 |     OrtTypeInfo* typeinfo;
183 |     switch (type) {
184 |       case NameType::INPUT:
185 |         RETURN_IF_ORT_ERROR(
186 |             ort_api->SessionGetInputTypeInfo(session, i, &typeinfo));
187 |         break;
188 |       case NameType::OUTPUT:
189 |         RETURN_IF_ORT_ERROR(
190 |             ort_api->SessionGetOutputTypeInfo(session, i, &typeinfo));
191 |         break;
192 |       case NameType::INITIALIZER:
193 |         RETURN_IF_ORT_ERROR(ort_api->SessionGetOverridableInitializerTypeInfo(
194 |             session, i, &typeinfo));
195 |         break;
196 |     }
197 | 
198 |     std::unique_ptr<OrtTypeInfo, TypeInfoDeleter> typeinfo_wrapper(typeinfo);
199 | 
200 |     ONNXType onnx_type;
201 |     RETURN_IF_ORT_ERROR(ort_api->GetOnnxTypeFromTypeInfo(typeinfo, &onnx_type));
202 |     RETURN_ERROR_IF_TRUE(
203 |         onnx_type != ONNX_TYPE_TENSOR, TRITONSERVER_ERROR_UNSUPPORTED,
204 |         std::string("Unsupported ONNX Type '") + OnnxTypeName(onnx_type) +
205 |             "' for I/O '" + name + "', expected '" +
206 |             OnnxTypeName(ONNX_TYPE_TENSOR) + "'.");
207 | 
208 |     const OrtTensorTypeAndShapeInfo* tensor_info;
209 |     RETURN_IF_ORT_ERROR(
210 |         ort_api->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
211 | 
212 |     ONNXTensorElementDataType type;
213 |     RETURN_IF_ORT_ERROR(ort_api->GetTensorElementType(tensor_info, &type));
214 | 
215 |     size_t num_dims;
216 |     RETURN_IF_ORT_ERROR(ort_api->GetDimensionsCount(tensor_info, &num_dims));
217 | 
218 |     std::vector<int64_t> dims(num_dims);
219 |     RETURN_IF_ORT_ERROR(
220 |         ort_api->GetDimensions(tensor_info, (int64_t*)dims.data(), num_dims));
221 | 
222 |     infos.emplace(std::move(name), OnnxTensorInfo(type, dims));
223 |   }
224 | 
225 |   return nullptr;  // success
226 | }
227 | 
228 | }  // namespace
229 | 
230 | std::string
231 | OnnxDataTypeName(ONNXTensorElementDataType onnx_type)
232 | {
233 |   switch (onnx_type) {
234 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
235 |       return "FLOAT";
236 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
237 |       return "UINT8";
238 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
239 |       return "INT8";
240 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
241 |       return "UINT16";
242 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
243 |       return "INT16";
244 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
245 |       return "INT32";
246 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
247 |       return "INT64";
248 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
249 |       return "STRING";
250 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
251 |       return "BOOL";
252 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
253 |       return "FLOAT16";
254 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
255 |       return "DOUBLE";
256 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
257 |       return "UINT32";
258 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
259 |       return "UINT64";
260 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
261 |       return "COMPLEX64";
262 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
263 |       return "COMPLEX64";
264 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
265 |       return "BFLOAT16";
266 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
267 |     default:
268 |       break;
269 |   }
270 | 
271 |   return "UNDEFINED";
272 | }
273 | 
274 | TRITONSERVER_DataType
275 | ConvertFromOnnxDataType(ONNXTensorElementDataType onnx_type)
276 | {
277 |   switch (onnx_type) {
278 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
279 |       // maps to c type float (4 bytes)
280 |       return TRITONSERVER_TYPE_FP32;
281 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
282 |       return TRITONSERVER_TYPE_UINT8;
283 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
284 |       return TRITONSERVER_TYPE_INT8;
285 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
286 |       return TRITONSERVER_TYPE_UINT16;
287 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
288 |       return TRITONSERVER_TYPE_INT16;
289 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
290 |       return TRITONSERVER_TYPE_INT32;
291 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
292 |       return TRITONSERVER_TYPE_INT64;
293 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
294 |       return TRITONSERVER_TYPE_BYTES;
295 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
296 |       return TRITONSERVER_TYPE_BOOL;
297 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
298 |       return TRITONSERVER_TYPE_FP16;
299 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
300 |       // maps to c type double (8 bytes)
301 |       return TRITONSERVER_TYPE_FP64;
302 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
303 |       return TRITONSERVER_TYPE_UINT32;
304 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
305 |       return TRITONSERVER_TYPE_UINT64;
306 |     // The following types are not supported:
307 |     // complex with float32 real and imaginary components
308 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
309 |     // complex with float64 real and imaginary components
310 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
311 |     // Non-IEEE floating-point format based on IEEE754 single-precision
312 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
313 |     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
314 |     default:
315 |       break;
316 |   }
317 | 
318 |   return TRITONSERVER_TYPE_INVALID;
319 | }
320 | 
321 | ONNXTensorElementDataType
322 | ConvertToOnnxDataType(TRITONSERVER_DataType data_type)
323 | {
324 |   switch (data_type) {
325 |     case TRITONSERVER_TYPE_UINT8:
326 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
327 |     case TRITONSERVER_TYPE_UINT16:
328 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16;
329 |     case TRITONSERVER_TYPE_UINT32:
330 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32;
331 |     case TRITONSERVER_TYPE_UINT64:
332 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64;
333 |     case TRITONSERVER_TYPE_INT8:
334 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8;
335 |     case TRITONSERVER_TYPE_INT16:
336 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16;
337 |     case TRITONSERVER_TYPE_INT32:
338 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
339 |     case TRITONSERVER_TYPE_INT64:
340 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
341 |     case TRITONSERVER_TYPE_FP16:
342 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
343 |     case TRITONSERVER_TYPE_FP32:
344 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
345 |     case TRITONSERVER_TYPE_FP64:
346 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
347 |     case TRITONSERVER_TYPE_BYTES:
348 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
349 |     case TRITONSERVER_TYPE_BOOL:
350 |       return ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL;
351 |     default:
352 |       break;
353 |   }
354 | 
355 |   return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
356 | }
357 | 
358 | ONNXTensorElementDataType
359 | ConvertToOnnxDataType(const std::string& data_type_str)
360 | {
361 |   TRITONSERVER_DataType data_type =
362 |       TRITONSERVER_StringToDataType(data_type_str.c_str());
363 |   return ConvertToOnnxDataType(data_type);
364 | }
365 | 
366 | ONNXTensorElementDataType
367 | ModelConfigDataTypeToOnnxDataType(const std::string& data_type_str)
368 | {
369 |   // Must start with "TYPE_".
370 |   if (data_type_str.rfind("TYPE_", 0) != 0) {
371 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
372 |   }
373 | 
374 |   const std::string dtype = data_type_str.substr(strlen("TYPE_"));
375 | 
376 |   if (dtype == "BOOL") {
377 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL;
378 |   } else if (dtype == "UINT8") {
379 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
380 |   } else if (dtype == "UINT16") {
381 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16;
382 |   } else if (dtype == "UINT32") {
383 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32;
384 |   } else if (dtype == "UINT64") {
385 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64;
386 |   } else if (dtype == "INT8") {
387 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8;
388 |   } else if (dtype == "INT16") {
389 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16;
390 |   } else if (dtype == "INT32") {
391 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
392 |   } else if (dtype == "INT64") {
393 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
394 |   } else if (dtype == "FP16") {
395 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
396 |   } else if (dtype == "FP32") {
397 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
398 |   } else if (dtype == "FP64") {
399 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
400 |   } else if (dtype == "STRING") {
401 |     return ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
402 |   }
403 | 
404 |   return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
405 | }
406 | 
407 | std::string
408 | OnnxDataTypeToModelConfigDataType(ONNXTensorElementDataType data_type)
409 | {
410 |   if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) {
411 |     return "TYPE_BOOL";
412 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) {
413 |     return "TYPE_UINT8";
414 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) {
415 |     return "TYPE_UINT16";
416 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32) {
417 |     return "TYPE_UINT32";
418 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64) {
419 |     return "TYPE_UINT64";
420 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) {
421 |     return "TYPE_INT8";
422 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16) {
423 |     return "TYPE_INT16";
424 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
425 |     return "TYPE_INT32";
426 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
427 |     return "TYPE_INT64";
428 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) {
429 |     return "TYPE_FP16";
430 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
431 |     return "TYPE_FP32";
432 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
433 |     return "TYPE_FP64";
434 |   } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) {
435 |     return "TYPE_STRING";
436 |   }
437 | 
438 |   return "TYPE_INVALID";
439 | }
440 | 
441 | TRITONSERVER_Error*
442 | OverridableInitializerNames(OrtSession* session, std::set<std::string>& names)
443 | {
444 |   return InputOutputInitializerNames(session, NameType::INITIALIZER, names);
445 | }
446 | 
447 | TRITONSERVER_Error*
448 | InputNames(OrtSession* session, std::set<std::string>& names)
449 | {
450 |   return InputOutputInitializerNames(session, NameType::INPUT, names);
451 | }
452 | 
453 | TRITONSERVER_Error*
454 | OutputNames(OrtSession* session, std::set<std::string>& names)
455 | {
456 |   return InputOutputInitializerNames(session, NameType::OUTPUT, names);
457 | }
458 | 
459 | TRITONSERVER_Error*
460 | InputInfos(
461 |     OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos)
462 | {
463 |   return InputOutputInitializerInfos(
464 |       session, allocator, NameType::INPUT, infos);
465 | }
466 | 
467 | TRITONSERVER_Error*
468 | OutputInfos(
469 |     OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos)
470 | {
471 |   return InputOutputInitializerInfos(
472 |       session, allocator, NameType::OUTPUT, infos);
473 | }
474 | 
475 | TRITONSERVER_Error*
476 | OverridableInitializerInfos(
477 |     OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos)
478 | {
479 |   return InputOutputInitializerInfos(
480 |       session, allocator, NameType::INITIALIZER, infos);
481 | }
482 | 
483 | TRITONSERVER_Error*
484 | CompareDimsSupported(
485 |     const std::string& model_name, const std::string& tensor_name,
486 |     const std::vector<int64_t>& model_shape, const std::vector<int64_t>& dims,
487 |     const int max_batch_size, const bool compare_exact)
488 | {
489 |   // If the model configuration expects batching support in the model,
490 |   // then the onnx shape first dimension must be -1.
491 |   const bool supports_batching = (max_batch_size > 0);
492 |   if (supports_batching) {
493 |     RETURN_ERROR_IF_TRUE(
494 |         (model_shape.size() == 0) || (model_shape[0] != -1),
495 |         TRITONSERVER_ERROR_INVALID_ARG,
496 |         std::string("model '") + model_name + "', tensor '" + tensor_name +
497 |             "': for the model to support batching the shape should have at "
498 |             "least 1 dimension and the first dimension must be -1; but shape "
499 |             "expected by the model is " +
500 |             ShapeToString(model_shape));
501 | 
502 |     std::vector<int64_t> full_dims;
503 |     full_dims.reserve(1 + dims.size());
504 |     full_dims.push_back(-1);
505 |     full_dims.insert(full_dims.end(), dims.begin(), dims.end());
506 | 
507 |     bool succ = (model_shape.size() == (size_t)full_dims.size());
508 |     if (succ) {
509 |       for (size_t i = 0; i < full_dims.size(); ++i) {
510 |         const int64_t model_dim = model_shape[i];
511 |         if (compare_exact || (model_dim != -1)) {
512 |           succ &= (model_dim == full_dims[i]);
513 |         }
514 |       }
515 |     }
516 | 
517 |     RETURN_ERROR_IF_TRUE(
518 |         !succ, TRITONSERVER_ERROR_INVALID_ARG,
519 |         std::string("model '") + model_name + "', tensor '" + tensor_name +
520 |             "': the model expects " + std::to_string(model_shape.size()) +
521 |             " dimensions (shape " + ShapeToString(model_shape) +
522 |             ") but the model configuration specifies " +
523 |             std::to_string(full_dims.size()) +
524 |             " dimensions (an initial batch dimension because max_batch_size "
525 |             "> 0 followed by the explicit tensor shape, making complete "
526 |             "shape " +
527 |             ShapeToString(full_dims) + ")");
528 |   } else {
529 |     // ! supports_batching
530 |     bool succ = (model_shape.size() == dims.size());
531 |     if (succ) {
532 |       for (size_t i = 0; i < dims.size(); ++i) {
533 |         const int64_t model_dim = model_shape[i];
534 |         if (compare_exact || (model_dim != -1)) {
535 |           succ &= (model_dim == dims[i]);
536 |         }
537 |       }
538 |     }
539 | 
540 |     RETURN_ERROR_IF_TRUE(
541 |         !succ, TRITONSERVER_ERROR_INVALID_ARG,
542 |         std::string("model '") + model_name + "', tensor '" + tensor_name +
543 |             "': the model expects " + std::to_string(model_shape.size()) +
544 |             " dimensions (shape " + ShapeToString(model_shape) +
545 |             ") but the model configuration specifies " +
546 |             std::to_string(dims.size()) + " dimensions (shape " +
547 |             ShapeToString(dims) + ")");
548 |   }
549 | 
550 |   return nullptr;  // success
551 | }
552 | 
553 | 
554 | }}}  // namespace triton::backend::onnxruntime
555 | 


--------------------------------------------------------------------------------
/src/onnxruntime_utils.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #pragma once
 28 | 
 29 | #include <onnxruntime_c_api.h>
 30 | 
 31 | #include <set>
 32 | #include <string>
 33 | #include <unordered_map>
 34 | #include <vector>
 35 | 
 36 | #include "triton/backend/backend_common.h"
 37 | #include "triton/core/tritonserver.h"
 38 | 
 39 | namespace triton { namespace backend { namespace onnxruntime {
 40 | 
 41 | extern const OrtApi* ort_api;
 42 | 
 43 | #define RESPOND_ALL_AND_SET_TRUE_IF_ORT_ERROR(                               \
 44 |     RESPONSES, RESPONSES_COUNT, BOOL, S)                                     \
 45 |   do {                                                                       \
 46 |     OrtStatus* status__ = (S);                                               \
 47 |     if (status__ != nullptr) {                                               \
 48 |       OrtErrorCode code = ort_api->GetErrorCode(status__);                   \
 49 |       std::string msg = std::string(ort_api->GetErrorMessage(status__));     \
 50 |       ort_api->ReleaseStatus(status__);                                      \
 51 |       auto err__ = TRITONSERVER_ErrorNew(                                    \
 52 |           TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \
 53 |                                         std::to_string(code) + ": " + msg)   \
 54 |                                            .c_str());                        \
 55 |       RESPOND_ALL_AND_SET_TRUE_IF_ERROR(                                     \
 56 |           RESPONSES, RESPONSES_COUNT, BOOL, err__);                          \
 57 |     }                                                                        \
 58 |   } while (false)
 59 | 
 60 | #define RETURN_IF_ORT_ERROR(S)                                               \
 61 |   do {                                                                       \
 62 |     OrtStatus* status__ = (S);                                               \
 63 |     if (status__ != nullptr) {                                               \
 64 |       OrtErrorCode code = ort_api->GetErrorCode(status__);                   \
 65 |       std::string msg = std::string(ort_api->GetErrorMessage(status__));     \
 66 |       ort_api->ReleaseStatus(status__);                                      \
 67 |       return TRITONSERVER_ErrorNew(                                          \
 68 |           TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \
 69 |                                         std::to_string(code) + ": " + msg)   \
 70 |                                            .c_str());                        \
 71 |     }                                                                        \
 72 |   } while (false)
 73 | 
 74 | #define THROW_IF_BACKEND_MODEL_ORT_ERROR(S)                                  \
 75 |   do {                                                                       \
 76 |     OrtStatus* status__ = (S);                                               \
 77 |     if (status__ != nullptr) {                                               \
 78 |       OrtErrorCode code = ort_api->GetErrorCode(status__);                   \
 79 |       std::string msg = std::string(ort_api->GetErrorMessage(status__));     \
 80 |       ort_api->ReleaseStatus(status__);                                      \
 81 |       throw BackendModelException(TRITONSERVER_ErrorNew(                     \
 82 |           TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \
 83 |                                         std::to_string(code) + ": " + msg)   \
 84 |                                            .c_str()));                       \
 85 |     }                                                                        \
 86 |   } while (false)
 87 | 
 88 | #define THROW_IF_BACKEND_INSTANCE_ORT_ERROR(S)                               \
 89 |   do {                                                                       \
 90 |     OrtStatus* status__ = (S);                                               \
 91 |     if (status__ != nullptr) {                                               \
 92 |       OrtErrorCode code = ort_api->GetErrorCode(status__);                   \
 93 |       std::string msg = std::string(ort_api->GetErrorMessage(status__));     \
 94 |       ort_api->ReleaseStatus(status__);                                      \
 95 |       throw BackendModelInstanceException(TRITONSERVER_ErrorNew(             \
 96 |           TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \
 97 |                                         std::to_string(code) + ": " + msg)   \
 98 |                                            .c_str()));                       \
 99 |     }                                                                        \
100 |   } while (false)
101 | 
102 | struct OnnxTensorInfo {
103 |   OnnxTensorInfo(ONNXTensorElementDataType type, std::vector<int64_t> dims)
104 |       : type_(type), dims_(dims)
105 |   {
106 |   }
107 | 
108 |   OnnxTensorInfo() {}
109 | 
110 |   ONNXTensorElementDataType type_{ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED};
111 |   std::vector<int64_t> dims_;
112 | };
113 | 
114 | using OnnxTensorInfoMap = std::unordered_map<std::string, OnnxTensorInfo>;
115 | 
116 | /// Deleter for OrtTypeInfo.
117 | struct TypeInfoDeleter {
118 |   void operator()(OrtTypeInfo* f) { ort_api->ReleaseTypeInfo(f); }
119 | };
120 | 
121 | /// Deleter for OrtSessionOptions.
122 | struct SessionOptionsDeleter {
123 |   void operator()(OrtSessionOptions* f) { ort_api->ReleaseSessionOptions(f); }
124 | };
125 | 
126 | std::string OnnxDataTypeName(ONNXTensorElementDataType onnx_type);
127 | 
128 | TRITONSERVER_DataType ConvertFromOnnxDataType(
129 |     ONNXTensorElementDataType onnx_type);
130 | 
131 | ONNXTensorElementDataType ConvertToOnnxDataType(
132 |     TRITONSERVER_DataType data_type);
133 | ONNXTensorElementDataType ConvertToOnnxDataType(
134 |     const std::string& data_type_str);
135 | 
136 | ONNXTensorElementDataType ModelConfigDataTypeToOnnxDataType(
137 |     const std::string& data_type_str);
138 | std::string OnnxDataTypeToModelConfigDataType(
139 |     ONNXTensorElementDataType data_type);
140 | 
141 | TRITONSERVER_Error* OverridableInitializerNames(
142 |     OrtSession* session, std::set<std::string>& names);
143 | TRITONSERVER_Error* InputNames(
144 |     OrtSession* session, std::set<std::string>& names);
145 | TRITONSERVER_Error* OutputNames(
146 |     OrtSession* session, std::set<std::string>& names);
147 | 
148 | TRITONSERVER_Error* OverridableInitializerInfos(
149 |     OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos);
150 | TRITONSERVER_Error* InputInfos(
151 |     OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos);
152 | TRITONSERVER_Error* OutputInfos(
153 |     OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos);
154 | 
155 | TRITONSERVER_Error* CompareDimsSupported(
156 |     const std::string& model_name, const std::string& tensor_name,
157 |     const std::vector<int64_t>& model_shape, const std::vector<int64_t>& dims,
158 |     const int max_batch_size, const bool compare_exact);
159 | 
160 | }}}  // namespace triton::backend::onnxruntime
161 | 


--------------------------------------------------------------------------------
/test/initializer_as_input/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | This test is originated in "onnxruntime_backend" repository to better
30 | represent the scope of the test, however, this test utilizes Triton utilities
31 | and assumes that the test is located under "qa" directory in "server" repository
32 | for accessing those utilities. Please make sure the test environment is properly
33 | set before running the test.


--------------------------------------------------------------------------------
/test/initializer_as_input/generate_test_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import numpy as np
28 | import onnx
29 | 
30 | # Reference script on how the model used in this test is created
31 | if __name__ == "__main__":
32 |     values = np.ones((5, 5)).astype(np.float32)
33 |     onnx_dtype = onnx.TensorProto.FLOAT
34 |     initialized_input = onnx.helper.make_tensor(
35 |         name="INITIALIZER",
36 |         data_type=onnx_dtype,
37 |         dims=values.shape,
38 |         vals=values.flatten().astype(float),
39 |     )
40 |     add = onnx.helper.make_node("Add", ["INPUT", "INITIALIZER"], ["OUTPUT"])
41 | 
42 |     input = onnx.helper.make_tensor_value_info("INPUT", onnx_dtype, values.shape)
43 |     initializer = onnx.helper.make_tensor_value_info(
44 |         "INITIALIZER", onnx_dtype, values.shape
45 |     )
46 |     output = onnx.helper.make_tensor_value_info("OUTPUT", onnx_dtype, values.shape)
47 | 
48 |     graph_proto = onnx.helper.make_graph(
49 |         [add],
50 |         "init_input",
51 |         [input, initializer],
52 |         [output],
53 |         initializer=[initialized_input],
54 |     )
55 |     model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
56 |     onnx.save(model_def, "model.onnx")
57 | 


--------------------------------------------------------------------------------
/test/initializer_as_input/models/add_with_initializer/1/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/triton-inference-server/onnxruntime_backend/cf4cd89bf66ed1d7fb89c2b0930d43bcce3c5f97/test/initializer_as_input/models/add_with_initializer/1/model.onnx


--------------------------------------------------------------------------------
/test/initializer_as_input/models/add_with_initializer/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | platform: "onnxruntime_onnx"
28 | max_batch_size: 0
29 | input [
30 |   {
31 |     name: "INPUT"
32 |     data_type: TYPE_FP32
33 |     dims: [5, 5]
34 |   },
35 |   {
36 |     name: "INITIALIZER"
37 |     data_type: TYPE_FP32
38 |     dims: [5, 5]
39 |     optional: true
40 |   }
41 | ]
42 | output [
43 |   {
44 |     name: "OUTPUT"
45 |     data_type: TYPE_FP32
46 |     dims: [ 5, 5]
47 |   }
48 | ]


--------------------------------------------------------------------------------
/test/initializer_as_input/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | import unittest
29 | 
30 | import numpy as np
31 | import tritonclient.http as httpclient
32 | 
33 | 
34 | class OptionalInputTest(unittest.TestCase):
35 |     def setUp(self):
36 |         self.client_ = httpclient.InferenceServerClient("localhost:8000")
37 |         self.model_name_ = "add_with_initializer"
38 |         self.input_data_ = np.zeros((5, 5)).astype(np.float32)
39 |         self.input_ = httpclient.InferInput("INPUT", self.input_data_.shape, "FP32")
40 |         self.input_.set_data_from_numpy(self.input_data_, binary_data=False)
41 |         self.optional_input_ = httpclient.InferInput(
42 |             "INITIALIZER", self.input_data_.shape, "FP32"
43 |         )
44 |         self.optional_input_.set_data_from_numpy(self.input_data_, binary_data=False)
45 | 
46 |     def test_without_optional(self):
47 |         # Send request without providing optional input, the ONNX model
48 |         # should use stored initializer value (tensor of all 1s)
49 |         results = self.client_.infer(self.model_name_, [self.input_])
50 |         np.testing.assert_allclose(results.as_numpy("OUTPUT"), (self.input_data_ + 1))
51 | 
52 |     def test_with_optional(self):
53 |         # Send request with optional input provided, the ONNX model
54 |         # should use provided value for the initializer
55 |         results = self.client_.infer(
56 |             self.model_name_, [self.input_, self.optional_input_]
57 |         )
58 |         np.testing.assert_allclose(
59 |             results.as_numpy("OUTPUT"), (self.input_data_ + self.input_data_)
60 |         )
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     unittest.main()
65 | 


--------------------------------------------------------------------------------
/test/initializer_as_input/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | export CUDA_VISIBLE_DEVICES=0
29 | 
30 | SERVER=/opt/tritonserver/bin/tritonserver
31 | SERVER_ARGS="--model-repository=`pwd`/models"
32 | SERVER_LOG="./server.log"
33 | CLIENT_LOG="./test.log"
34 | source ../common/util.sh
35 | 
36 | rm -f *.log
37 | 
38 | run_server
39 | if [ "$SERVER_PID" == "0" ]; then
40 |     echo -e "\n***\n*** Failed to start $SERVER\n***"
41 |     cat $SERVER_LOG
42 |     exit 1
43 | fi
44 | 
45 | RET=0
46 | 
47 | set +e
48 | 
49 | python test.py >>$CLIENT_LOG 2>&1
50 | if [ $? -ne 0 ]; then
51 |     cat $CLIENT_LOG
52 |     echo -e "\n***\n*** Test Failed\n***"
53 |     RET=1
54 | fi
55 | 
56 | set -e
57 | 
58 | kill $SERVER_PID
59 | wait $SERVER_PID
60 | 
61 | if [ $RET -eq 0 ]; then
62 |     echo -e "\n***\n*** Test Passed\n***"
63 | else
64 |     echo -e "\n***\n*** Test FAILED\n***"
65 | fi
66 | 
67 | exit $RET
68 | 


--------------------------------------------------------------------------------
/tools/gen_ort_dockerfile.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | #  * Redistributions of source code must retain the above copyright
  8 | #    notice, this list of conditions and the following disclaimer.
  9 | #  * Redistributions in binary form must reproduce the above copyright
 10 | #    notice, this list of conditions and the following disclaimer in the
 11 | #    documentation and/or other materials provided with the distribution.
 12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 13 | #    contributors may be used to endorse or promote products derived
 14 | #    from this software without specific prior written permission.
 15 | #
 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | import argparse
 29 | import os
 30 | import platform
 31 | import re
 32 | 
 33 | FLAGS = None
 34 | 
 35 | ORT_TO_TRTPARSER_VERSION_MAP = {
 36 |     "1.9.0": (
 37 |         "8.2",  # TensorRT version
 38 |         "release/8.2-GA",  # ONNX-Tensorrt parser version
 39 |     ),
 40 |     "1.10.0": (
 41 |         "8.2",  # TensorRT version
 42 |         "release/8.2-GA",  # ONNX-Tensorrt parser version
 43 |     ),
 44 | }
 45 | 
 46 | OPENVINO_VERSION_MAP = {
 47 |     "2024.0.0": (
 48 |         "2024.0",  # OpenVINO short version
 49 |         "2024.0.0.14509.34caeefd078",  # OpenVINO version with build number
 50 |     ),
 51 |     "2024.1.0": (
 52 |         "2024.1",  # OpenVINO short version
 53 |         "2024.1.0.15008.f4afc983258",  # OpenVINO version with build number
 54 |     ),
 55 |     "2024.4.0": (
 56 |         "2024.4",  # OpenVINO short version
 57 |         "2024.4.0.16579.c3152d32c9c",  # OpenVINO version with build number
 58 |     ),
 59 |     "2024.5.0": (
 60 |         "2024.5",  # OpenVINO short version
 61 |         "2024.5.0.17288.7975fa5da0c",  # OpenVINO version with build number
 62 |     ),
 63 |     "2025.0.0": (
 64 |         "2025.0",  # OpenVINO short version
 65 |         "2025.0.0.17942.1f68be9f594",  # OpenVINO version with build number
 66 |     ),
 67 |     "2025.1.0": (
 68 |         "2025.1",  # OpenVINO short version
 69 |         "2025.1.0.18503.6fec06580ab",  # OpenVINO version with build number
 70 |     ),
 71 | }
 72 | 
 73 | 
 74 | def target_platform():
 75 |     if FLAGS.target_platform is not None:
 76 |         return FLAGS.target_platform
 77 |     return platform.system().lower()
 78 | 
 79 | 
 80 | def dockerfile_common():
 81 |     df = """
 82 | ARG BASE_IMAGE={}
 83 | ARG ONNXRUNTIME_VERSION={}
 84 | ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime
 85 | ARG ONNXRUNTIME_BUILD_CONFIG={}
 86 | """.format(
 87 |         FLAGS.triton_container, FLAGS.ort_version, FLAGS.ort_build_config
 88 |     )
 89 | 
 90 |     if FLAGS.ort_openvino is not None:
 91 |         df += """
 92 | ARG ONNXRUNTIME_OPENVINO_VERSION={}
 93 | """.format(
 94 |             FLAGS.ort_openvino
 95 |         )
 96 | 
 97 |     df += """
 98 | FROM ${BASE_IMAGE}
 99 | WORKDIR /workspace
100 | """
101 |     return df
102 | 
103 | 
104 | def dockerfile_for_linux(output_file):
105 |     df = dockerfile_common()
106 |     df += """
107 | # Ensure apt-get won't prompt for selecting options
108 | ENV DEBIAN_FRONTEND=noninteractive
109 | ENV PIP_BREAK_SYSTEM_PACKAGES=1
110 | 
111 | # The Onnx Runtime dockerfile is the collection of steps in
112 | # https://github.com/microsoft/onnxruntime/tree/master/dockerfiles
113 | 
114 | """
115 |     # Consider moving rhel logic to its own function e.g., dockerfile_for_rhel
116 |     # if the changes become more substantial.
117 |     if target_platform() == "rhel":
118 |         df += """
119 | # The manylinux container defaults to Python 3.7, but some feature installation
120 | # requires a higher version.
121 | ARG PYVER=3.12
122 | ENV PYTHONPATH=/opt/python/v
123 | RUN ln -sf /opt/python/cp${PYVER/./}* ${PYTHONPATH}
124 | 
125 | ENV PYBIN=${PYTHONPATH}/bin
126 | ENV PYTHON_BIN_PATH=${PYBIN}/python${PYVER} \
127 |     PATH=${PYBIN}:${PATH}
128 | 
129 | RUN yum install -y \
130 |         wget \
131 |         zip \
132 |         ca-certificates \
133 |         curl \
134 |         python3-pip \
135 |         git \
136 |         gnupg \
137 |         gnupg1 \
138 |         openssl-devel
139 | 
140 | RUN pip3 install patchelf==0.17.2
141 | """
142 |     else:
143 |         if os.getenv("CCACHE_REMOTE_ONLY") and os.getenv("CCACHE_REMOTE_STORAGE"):
144 |             df += """
145 | ENV CCACHE_REMOTE_ONLY="true" \\
146 |     CCACHE_REMOTE_STORAGE="{}" \\
147 |     CMAKE_CXX_COMPILER_LAUNCHER="ccache" \\
148 |     CMAKE_C_COMPILER_LAUNCHER="ccache" \\
149 |     CMAKE_CUDA_COMPILER_LAUNCHER="ccache" \\
150 |     VERBOSE=1
151 | 
152 | RUN apt-get update \\
153 |       && apt-get install -y --no-install-recommends ccache && ccache -p \\
154 |       && rm -rf /var/lib/apt/lists/*
155 | """.format(
156 |                 os.getenv("CCACHE_REMOTE_STORAGE")
157 |             )
158 | 
159 |         df += """
160 | 
161 | RUN apt-get update && apt-get install -y --no-install-recommends \
162 |         software-properties-common \
163 |         wget \
164 |         zip \
165 |         ca-certificates \
166 |         build-essential \
167 |         curl \
168 |         libcurl4-openssl-dev \
169 |         libssl-dev \
170 |         python3-dev \
171 |         python3-pip \
172 |         git \
173 |         gnupg \
174 |         gnupg1
175 | 
176 | RUN pip3 install patchelf==0.17.2
177 | 
178 | # Install dependencies from
179 | # onnxruntime/dockerfiles/scripts/install_common_deps.sh.
180 | RUN apt update -q=2 \\
181 |     && apt install -y gpg wget \\
182 |     && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \\
183 |     && . /etc/os-release \\
184 |     && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \\
185 |     && apt-get update -q=2 \\
186 |     && apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3* \\
187 |     && cmake --version
188 | 
189 | """
190 | 
191 |     if FLAGS.ort_openvino is not None:
192 |         df += """
193 | # Install OpenVINO
194 | ARG ONNXRUNTIME_OPENVINO_VERSION
195 | ENV INTEL_OPENVINO_DIR /opt/intel/openvino_${ONNXRUNTIME_OPENVINO_VERSION}
196 | """
197 |         df += """
198 | ARG OPENVINO_SHORT_VERSION={}
199 | ARG OPENVINO_VERSION_WITH_BUILD_NUMBER={}
200 | """.format(
201 |             OPENVINO_VERSION_MAP[FLAGS.ort_openvino][0],
202 |             OPENVINO_VERSION_MAP[FLAGS.ort_openvino][1],
203 |         )
204 | 
205 |         # Openvino changed the filename of the toolkit in 2025.0.0 so we need to detect this for
206 |         # the release we want to install
207 |         openvino_folder_name = "UNKNOWN_FOLDER_NAME"
208 |         openvino_toolkit_filename = "UNKNOWN_FILENAME"
209 |         if OPENVINO_VERSION_MAP[FLAGS.ort_openvino][0].split(".")[0] >= "2025":
210 |             openvino_folder_name = (
211 |                 "openvino_toolkit_ubuntu24_${OPENVINO_VERSION_WITH_BUILD_NUMBER}_x86_64"
212 |             )
213 |             openvino_toolkit_filename = openvino_folder_name + ".tgz"
214 |         else:
215 |             openvino_folder_name = "l_openvino_toolkit_ubuntu24_${OPENVINO_VERSION_WITH_BUILD_NUMBER}_x86_64"
216 |             openvino_toolkit_filename = openvino_folder_name + ".tgz"
217 | 
218 |         df += """
219 | # Step 1: Download and install core components
220 | # Ref: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html#step-1-download-and-install-the-openvino-core-components
221 | RUN curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/${{OPENVINO_SHORT_VERSION}}/linux/{} --output openvino_${{ONNXRUNTIME_OPENVINO_VERSION}}.tgz && \
222 |     tar -xf openvino_${{ONNXRUNTIME_OPENVINO_VERSION}}.tgz && \
223 |     mkdir -p ${{INTEL_OPENVINO_DIR}} && \
224 |     mv {}/* ${{INTEL_OPENVINO_DIR}} && \
225 |     rm openvino_${{ONNXRUNTIME_OPENVINO_VERSION}}.tgz && \
226 |     (cd ${{INTEL_OPENVINO_DIR}}/install_dependencies && \
227 |         ./install_openvino_dependencies.sh -y) && \
228 |     ln -s ${{INTEL_OPENVINO_DIR}} ${{INTEL_OPENVINO_DIR}}/../openvino_`echo ${{ONNXRUNTIME_OPENVINO_VERSION}} | awk '{{print substr($0,0,4)}}'`
229 | 
230 | # Step 2: Configure the environment
231 | # Ref: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html#step-2-configure-the-environment
232 | ENV OpenVINO_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
233 | ENV LD_LIBRARY_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64:$LD_LIBRARY_PATH
234 | ENV PKG_CONFIG_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64/pkgconfig
235 | ENV PYTHONPATH=$INTEL_OPENVINO_DIR/python/python3.12:$INTEL_OPENVINO_DIR/python/python3:$PYTHONPATH
236 | """.format(
237 |             openvino_toolkit_filename, openvino_folder_name
238 |         )
239 | 
240 |     ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
241 |     # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
242 |     # use tensorrt-8.0 branch instead of using rel-1.8.1
243 |     # From ORT 1.9 onwards we will switch back to using rel-* branches
244 |     if FLAGS.ort_version == "1.8.1":
245 |         df += """
246 | #
247 | # ONNX Runtime build
248 | #
249 | ARG ONNXRUNTIME_VERSION
250 | ARG ONNXRUNTIME_REPO
251 | ARG ONNXRUNTIME_BUILD_CONFIG
252 | 
253 | RUN git clone -b tensorrt-8.0 --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
254 |     (cd onnxruntime && git submodule update --init --recursive)
255 |        """
256 |     # Use the tensorrt-8.5ea branch to use Tensor RT 8.5a to use the built-in tensorrt parser
257 |     elif FLAGS.ort_version == "1.12.1":
258 |         df += """
259 | #
260 | # ONNX Runtime build
261 | #
262 | ARG ONNXRUNTIME_VERSION
263 | ARG ONNXRUNTIME_REPO
264 | ARG ONNXRUNTIME_BUILD_CONFIG
265 | 
266 | RUN git clone -b tensorrt-8.5ea --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
267 |     (cd onnxruntime && git submodule update --init --recursive)
268 |        """
269 |     else:
270 |         df += """
271 | #
272 | # ONNX Runtime build
273 | #
274 | ARG ONNXRUNTIME_VERSION
275 | ARG ONNXRUNTIME_REPO
276 | ARG ONNXRUNTIME_BUILD_CONFIG
277 | 
278 | RUN git clone -b rel-${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
279 |     (cd onnxruntime && git submodule update --init --recursive)
280 |         """
281 | 
282 |     if FLAGS.onnx_tensorrt_tag != "":
283 |         df += """
284 |     RUN (cd /workspace/onnxruntime/cmake/external/onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
285 |     """.format(
286 |             FLAGS.onnx_tensorrt_tag
287 |         )
288 | 
289 |     ep_flags = ""
290 |     if FLAGS.enable_gpu:
291 |         ep_flags = "--use_cuda"
292 |         if FLAGS.cuda_version is not None:
293 |             ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
294 |         if FLAGS.cuda_home is not None:
295 |             ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
296 |         if FLAGS.cudnn_home is not None:
297 |             ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
298 |         elif target_platform() == "igpu":
299 |             ep_flags += ' --cudnn_home "/usr/include"'
300 |         if FLAGS.ort_tensorrt:
301 |             ep_flags += " --use_tensorrt"
302 |             if FLAGS.ort_version >= "1.12.1":
303 |                 ep_flags += " --use_tensorrt_builtin_parser"
304 |             if FLAGS.tensorrt_home is not None:
305 |                 ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
306 | 
307 |     if os.name == "posix":
308 |         if os.getuid() == 0:
309 |             ep_flags += " --allow_running_as_root"
310 | 
311 |     if FLAGS.ort_openvino is not None:
312 |         ep_flags += " --use_openvino CPU"
313 | 
314 |     if target_platform() == "igpu":
315 |         ep_flags += (
316 |             " --skip_tests --cmake_extra_defines 'onnxruntime_BUILD_UNIT_TESTS=OFF'"
317 |         )
318 |         cuda_archs = "87;101"
319 |     else:
320 |         cuda_archs = "75;80;86;89;90;100;120"
321 | 
322 |     df += """
323 | WORKDIR /workspace/onnxruntime
324 | ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
325 |     --compile_no_warning_as_error --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='{}'  --cmake_extra_defines CMAKE_POLICY_VERSION_MINIMUM=3.5 "
326 | """.format(
327 |         cuda_archs
328 |     )
329 | 
330 |     df += """
331 | RUN ./build.sh ${{COMMON_BUILD_ARGS}} --update --build {}
332 | """.format(
333 |         ep_flags
334 |     )
335 | 
336 |     df += """
337 | #
338 | # Copy all artifacts needed by the backend to /opt/onnxruntime
339 | #
340 | WORKDIR /opt/onnxruntime
341 | 
342 | RUN mkdir -p /opt/onnxruntime && \
343 |     cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \
344 |     cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
345 | 
346 | # ONNX Runtime headers, libraries and binaries
347 | RUN mkdir -p /opt/onnxruntime/include && \
348 |     cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \
349 |        /opt/onnxruntime/include && \
350 |     cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \
351 |        /opt/onnxruntime/include && \
352 |     cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \
353 |        /opt/onnxruntime/include
354 | 
355 | RUN mkdir -p /opt/onnxruntime/lib && \
356 |     cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \
357 |        /opt/onnxruntime/lib && \
358 |     cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
359 |        /opt/onnxruntime/lib
360 | """
361 |     if target_platform() == "igpu":
362 |         df += """
363 | RUN mkdir -p /opt/onnxruntime/bin
364 | """
365 |     else:
366 |         df += """
367 | RUN mkdir -p /opt/onnxruntime/bin && \
368 |     cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnxruntime_perf_test \
369 |        /opt/onnxruntime/bin && \
370 |     cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnx_test_runner \
371 |        /opt/onnxruntime/bin && \
372 |     (cd /opt/onnxruntime/bin && chmod a+x *)
373 | """
374 | 
375 |     if FLAGS.enable_gpu:
376 |         df += """
377 | RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_cuda.so \
378 |        /opt/onnxruntime/lib
379 | """
380 | 
381 |     if FLAGS.ort_tensorrt:
382 |         df += """
383 | # TensorRT specific headers and libraries
384 | RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_tensorrt.so \
385 |        /opt/onnxruntime/lib
386 | """
387 | 
388 |     if FLAGS.ort_openvino is not None:
389 |         df += """
390 | # OpenVino specific headers and libraries
391 | RUN cp -r ${INTEL_OPENVINO_DIR}/docs/licensing /opt/onnxruntime/LICENSE.openvino
392 | 
393 | RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h \
394 |        /opt/onnxruntime/include
395 | 
396 | RUN apt-get update && apt-get install -y --no-install-recommends libtbb12
397 | 
398 | RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_openvino.so \
399 |        /opt/onnxruntime/lib && \
400 |     cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} \
401 |        /opt/onnxruntime/lib && \
402 |     cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} \
403 |        /opt/onnxruntime/lib && \
404 |     cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_intel_cpu_plugin.so \
405 |        /opt/onnxruntime/lib && \
406 |     cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \
407 |        /opt/onnxruntime/lib && \
408 |     cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \
409 |        /opt/onnxruntime/lib && \
410 |     cp /usr/lib/x86_64-linux-gnu/libtbb.so.* /opt/onnxruntime/lib
411 | 
412 | RUN OV_SHORT_VERSION=`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{ split($0,a,"."); print substr(a[1],3) a[2] a[3] }'` && \
413 |     (cd /opt/onnxruntime/lib && \
414 |         chmod a-x * && \
415 |         ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so.${OV_SHORT_VERSION} && \
416 |         ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so && \
417 |         ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so.${OV_SHORT_VERSION} && \
418 |         ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so && \
419 |         ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so.${OV_SHORT_VERSION} && \
420 |         ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so && \
421 |         ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so.${OV_SHORT_VERSION} && \
422 |         ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so)
423 | """
424 |     # Linking compiled ONNX Runtime libraries to their corresponding versioned libraries
425 |     df += """
426 | RUN cd /opt/onnxruntime/lib \
427 |         && ln -s libonnxruntime.so libonnxruntime.so.1 \
428 |         && ln -s libonnxruntime.so.1 libonnxruntime.so.${ONNXRUNTIME_VERSION}
429 | """
430 |     df += """
431 | RUN cd /opt/onnxruntime/lib && \
432 |     for i in `find . -mindepth 1 -maxdepth 1 -type f -name '*\\.so*'`; do \
433 |         patchelf --set-rpath '$ORIGIN' $i; \
434 |     done
435 | 
436 | # For testing copy ONNX custom op library and model
437 | """
438 |     if target_platform() == "igpu":
439 |         df += """
440 | RUN mkdir -p /opt/onnxruntime/test
441 | """
442 |     else:
443 |         df += """
444 | RUN mkdir -p /opt/onnxruntime/test && \
445 |     cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libcustom_op_library.so \
446 |        /opt/onnxruntime/test && \
447 |     cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/testdata/custom_op_library/custom_op_test.onnx \
448 |        /opt/onnxruntime/test
449 | """
450 | 
451 |     with open(output_file, "w") as dfile:
452 |         dfile.write(df)
453 | 
454 | 
455 | def dockerfile_for_windows(output_file):
456 |     df = dockerfile_common()
457 | 
458 |     ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
459 |     # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
460 |     # use tensorrt-8.0 branch instead of using rel-1.8.1
461 |     # From ORT 1.9 onwards we will switch back to using rel-* branches
462 |     if FLAGS.ort_version == "1.8.1":
463 |         df += """
464 | SHELL ["cmd", "/S", "/C"]
465 | 
466 | #
467 | # ONNX Runtime build
468 | #
469 | ARG ONNXRUNTIME_VERSION
470 | ARG ONNXRUNTIME_REPO
471 | 
472 | RUN git clone -b tensorrt-8.0 --recursive %ONNXRUNTIME_REPO% onnxruntime && \
473 |     (cd onnxruntime && git submodule update --init --recursive)
474 | """
475 |     else:
476 |         df += """
477 | SHELL ["cmd", "/S", "/C"]
478 | 
479 | #
480 | # ONNX Runtime build
481 | #
482 | ARG ONNXRUNTIME_VERSION
483 | ARG ONNXRUNTIME_REPO
484 | RUN git clone -b rel-%ONNXRUNTIME_VERSION% --recursive %ONNXRUNTIME_REPO% onnxruntime && \
485 |     cd onnxruntime && git submodule update --init --recursive
486 | """
487 | 
488 |     if FLAGS.onnx_tensorrt_tag != "":
489 |         df += """
490 |     RUN (cd \\workspace\\onnxruntime\\cmake\\external\\onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
491 |     """.format(
492 |             FLAGS.onnx_tensorrt_tag
493 |         )
494 | 
495 |     ep_flags = ""
496 |     if FLAGS.enable_gpu:
497 |         ep_flags = "--use_cuda"
498 |         if FLAGS.cuda_version is not None:
499 |             ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
500 |         if FLAGS.cuda_home is not None:
501 |             ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
502 |         if FLAGS.cudnn_home is not None:
503 |             ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
504 |         if FLAGS.ort_tensorrt:
505 |             ep_flags += " --use_tensorrt"
506 |             if FLAGS.tensorrt_home is not None:
507 |                 ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
508 |     if FLAGS.ort_openvino is not None:
509 |         ep_flags += " --use_openvino CPU"
510 | 
511 |     df += """
512 | WORKDIR /workspace/onnxruntime
513 | ARG VS_DEVCMD_BAT="\\BuildTools\\VC\\Auxiliary\\Build\\vcvars64.bat"
514 | RUN powershell Set-Content 'build.bat' -value 'call %VS_DEVCMD_BAT%',(Get-Content 'build.bat')
515 | RUN build.bat --cmake_generator "Visual Studio 17 2022" --config Release --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;86;90;100;120" --skip_submodule_sync --parallel --build_shared_lib --compile_no_warning_as_error --skip_tests --update --build --build_dir /workspace/build {}
516 | """.format(
517 |         ep_flags
518 |     )
519 | 
520 |     df += """
521 | #
522 | # Copy all artifacts needed by the backend to /opt/onnxruntime
523 | #
524 | WORKDIR /opt/onnxruntime
525 | RUN copy \\workspace\\onnxruntime\\LICENSE \\opt\\onnxruntime
526 | RUN copy \\workspace\\onnxruntime\\cmake\\external\\onnx\\VERSION_NUMBER \\opt\\onnxruntime\\ort_onnx_version.txt
527 | 
528 | # ONNX Runtime headers, libraries and binaries
529 | WORKDIR /opt/onnxruntime/include
530 | RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_c_api.h \\opt\\onnxruntime\\include
531 | RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_session_options_config_keys.h \\opt\\onnxruntime\\include
532 | RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\cpu\\cpu_provider_factory.h \\opt\\onnxruntime\\include
533 | 
534 | WORKDIR /opt/onnxruntime/bin
535 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.dll \\opt\\onnxruntime\\bin
536 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.dll \\opt\\onnxruntime\\bin
537 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_perf_test.exe \\opt\\onnxruntime\\bin
538 | RUN copy \\workspace\\build\\Release\\Release\\onnx_test_runner.exe \\opt\\onnxruntime\\bin
539 | 
540 | WORKDIR /opt/onnxruntime/lib
541 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.lib \\opt\\onnxruntime\\lib
542 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.lib \\opt\\onnxruntime\\lib
543 | """
544 | 
545 |     if FLAGS.enable_gpu:
546 |         df += """
547 | WORKDIR /opt/onnxruntime/lib
548 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.lib \\opt\\onnxruntime\\lib
549 | WORKDIR /opt/onnxruntime/bin
550 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.dll \\opt\\onnxruntime\\bin
551 | """
552 | 
553 |     if FLAGS.ort_tensorrt:
554 |         df += """
555 | # TensorRT specific headers and libraries
556 | WORKDIR /opt/onnxruntime/lib
557 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.dll \\opt\\onnxruntime\\bin
558 | 
559 | WORKDIR /opt/onnxruntime/lib
560 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.lib \\opt\\onnxruntime\\lib
561 | """
562 |     with open(output_file, "w") as dfile:
563 |         dfile.write(df)
564 | 
565 | 
566 | def preprocess_gpu_flags():
567 |     if target_platform() == "windows":
568 |         # Default to CUDA based on CUDA_PATH envvar and TensorRT in
569 |         # C:/tensorrt
570 |         if "CUDA_PATH" in os.environ:
571 |             if FLAGS.cuda_home is None:
572 |                 FLAGS.cuda_home = os.environ["CUDA_PATH"]
573 |             elif FLAGS.cuda_home != os.environ["CUDA_PATH"]:
574 |                 print("warning: --cuda-home does not match CUDA_PATH envvar")
575 | 
576 |         if FLAGS.cudnn_home is None:
577 |             FLAGS.cudnn_home = FLAGS.cuda_home
578 | 
579 |         version = None
580 |         m = re.match(r".*v([1-9]?[0-9]+\.[0-9]+)$", FLAGS.cuda_home)
581 |         if m:
582 |             version = m.group(1)
583 | 
584 |         if FLAGS.cuda_version is None:
585 |             FLAGS.cuda_version = version
586 |         elif FLAGS.cuda_version != version:
587 |             print("warning: --cuda-version does not match CUDA_PATH envvar")
588 | 
589 |         if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None):
590 |             print("error: windows build requires --cuda-version and --cuda-home")
591 | 
592 |         if FLAGS.tensorrt_home is None:
593 |             FLAGS.tensorrt_home = "/tensorrt"
594 |     else:
595 |         if "CUDNN_VERSION" in os.environ:
596 |             if FLAGS.cudnn_home is None:
597 |                 FLAGS.cudnn_home = "/usr"
598 | 
599 |         if FLAGS.cuda_home is None:
600 |             FLAGS.cuda_home = "/usr/local/cuda"
601 | 
602 |         if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None):
603 |             print("error: linux build requires --cudnn-home and --cuda-home")
604 | 
605 |         if FLAGS.tensorrt_home is None:
606 |             if target_platform() == "rhel":
607 |                 if platform.machine().lower() == "aarch64":
608 |                     FLAGS.tensorrt_home = "/usr/local/cuda/targets/sbsa-linux/"
609 |                 else:
610 |                     FLAGS.tensorrt_home = "/usr/local/cuda/targets/x86_64-linux/"
611 |             else:
612 |                 FLAGS.tensorrt_home = "/usr/src/tensorrt"
613 | 
614 | 
615 | if __name__ == "__main__":
616 |     parser = argparse.ArgumentParser()
617 | 
618 |     parser.add_argument(
619 |         "--triton-container",
620 |         type=str,
621 |         required=True,
622 |         help="Triton base container to use for ORT build.",
623 |     )
624 |     parser.add_argument("--ort-version", type=str, required=True, help="ORT version.")
625 |     parser.add_argument(
626 |         "--output", type=str, required=True, help="File to write Dockerfile to."
627 |     )
628 |     parser.add_argument(
629 |         "--enable-gpu", action="store_true", required=False, help="Enable GPU support"
630 |     )
631 |     parser.add_argument(
632 |         "--ort-build-config",
633 |         type=str,
634 |         default="Release",
635 |         choices=["Debug", "Release", "RelWithDebInfo"],
636 |         help="ORT build configuration.",
637 |     )
638 |     parser.add_argument(
639 |         "--target-platform",
640 |         required=False,
641 |         default=None,
642 |         help='Target for build, can be "linux", "windows", "rhel", or "igpu". If not specified, build targets the current platform.',
643 |     )
644 | 
645 |     parser.add_argument(
646 |         "--cuda-version", type=str, required=False, help="Version for CUDA."
647 |     )
648 |     parser.add_argument(
649 |         "--cuda-home", type=str, required=False, help="Home directory for CUDA."
650 |     )
651 |     parser.add_argument(
652 |         "--cudnn-home", type=str, required=False, help="Home directory for CUDNN."
653 |     )
654 |     parser.add_argument(
655 |         "--ort-openvino",
656 |         type=str,
657 |         required=False,
658 |         help="Enable OpenVino execution provider using specified OpenVINO version.",
659 |     )
660 |     parser.add_argument(
661 |         "--ort-tensorrt",
662 |         action="store_true",
663 |         required=False,
664 |         help="Enable TensorRT execution provider.",
665 |     )
666 |     parser.add_argument(
667 |         "--tensorrt-home", type=str, required=False, help="Home directory for TensorRT."
668 |     )
669 |     parser.add_argument(
670 |         "--onnx-tensorrt-tag", type=str, default="", help="onnx-tensorrt repo tag."
671 |     )
672 |     parser.add_argument("--trt-version", type=str, default="", help="TRT version.")
673 | 
674 |     FLAGS = parser.parse_args()
675 |     if FLAGS.enable_gpu:
676 |         preprocess_gpu_flags()
677 | 
678 |     # if a tag is provided by the user, then simply use it
679 |     # if the tag is empty - check whether there is an entry in the ORT_TO_TRTPARSER_VERSION_MAP
680 |     # map corresponding to ort version + trt version combo. If yes then use it
681 |     # otherwise we leave it empty and use the defaults from ort
682 |     if (
683 |         FLAGS.onnx_tensorrt_tag == ""
684 |         and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys()
685 |     ):
686 |         trt_version = re.match(r"^[0-9]+\.[0-9]+", FLAGS.trt_version)
687 |         if (
688 |             trt_version
689 |             and trt_version.group(0)
690 |             == ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][0]
691 |         ):
692 |             FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][1]
693 | 
694 |     if target_platform() == "windows":
695 |         # OpenVINO EP not yet supported for windows build
696 |         if FLAGS.ort_openvino is not None:
697 |             print("warning: OpenVINO not supported for windows, ignoring")
698 |             FLAGS.ort_openvino = None
699 |         dockerfile_for_windows(FLAGS.output)
700 |     else:
701 |         dockerfile_for_linux(FLAGS.output)
702 | 


--------------------------------------------------------------------------------