├── .clang-format ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── codeql.yml │ ├── main.yml │ ├── ok-to-test-command.yml │ └── pre-commit.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── TritonOnnxRuntimeBackendConfig.cmake.in └── download_onnxruntime.cmake ├── pyproject.toml ├── src ├── libtriton_onnxruntime.ldscript ├── onnxruntime.cc ├── onnxruntime_loader.cc ├── onnxruntime_loader.h ├── onnxruntime_utils.cc └── onnxruntime_utils.h ├── test └── initializer_as_input │ ├── README.md │ ├── generate_test_model.py │ ├── models │ └── add_with_initializer │ │ ├── 1 │ │ └── model.onnx │ │ └── config.pbtxt │ ├── test.py │ └── test.sh └── tools └── gen_ort_dockerfile.py /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | 4 | IndentWidth: 2 5 | ColumnLimit: 80 6 | ContinuationIndentWidth: 4 7 | UseTab: Never 8 | MaxEmptyLinesToKeep: 2 9 | 10 | SortIncludes: true 11 | CompactNamespaces: true 12 | ReflowComments: true 13 | 14 | DerivePointerAlignment: false 15 | PointerAlignment: Left 16 | 17 | AllowShortIfStatementsOnASingleLine: false 18 | AllowShortBlocksOnASingleLine: false 19 | AllowShortFunctionsOnASingleLine: Inline 20 | 21 | AlwaysBreakAfterReturnType: TopLevelDefinitions 22 | AlignAfterOpenBracket: AlwaysBreak 23 | BreakBeforeBraces: Custom 24 | BraceWrapping: 25 | AfterClass: false 26 | AfterControlStatement: false 27 | AfterEnum: false 28 | AfterFunction: true 29 | AfterNamespace: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | BeforeCatch: true 33 | 34 | BinPackArguments: true 35 | BinPackParameters: true 36 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 37 | 38 | IndentCaseLabels: true 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Description** 11 | A clear and concise description of what the bug is. 12 | 13 | **Triton Information** 14 | What version of Triton are you using? 15 | 16 | Are you using the Triton container or did you build it yourself? 17 | 18 | **To Reproduce** 19 | 20 | If the problem appears to be a bug in the execution of the model itself, first attempt to run the model directly in ONNX Runtime. What is the output from loading and running the model in ORT directly? If there is a problem running the model directly with ORT, please submit an issue in the microsoft/onnxruntime (github.com) project. 21 | 22 | If the problem appears to be in Triton itself, provide detailed steps to reproduce the behavior in Triton. 23 | 24 | Describe the models (framework, inputs, outputs), ideally include the model configuration file (if using an ensemble include the model configuration file for that as well). 25 | 26 | **Expected behavior** 27 | A clear and concise description of what you expected to happen. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "CodeQL" 28 | 29 | on: 30 | pull_request: 31 | 32 | jobs: 33 | analyze: 34 | name: Analyze 35 | runs-on: ubuntu-latest 36 | permissions: 37 | actions: read 38 | contents: read 39 | security-events: write 40 | 41 | strategy: 42 | fail-fast: false 43 | matrix: 44 | language: [ 'python' ] 45 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 46 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 47 | 48 | steps: 49 | - name: Checkout repository 50 | uses: actions/checkout@v3 51 | 52 | # Initializes the CodeQL tools for scanning. 53 | - name: Initialize CodeQL 54 | uses: github/codeql-action/init@v2 55 | with: 56 | languages: ${{ matrix.language }} 57 | # If you wish to specify custom queries, you can do so here or in a config file. 58 | # By default, queries listed here will override any specified in a config file. 59 | # Prefix the list here with "+" to use these queries and those in the config file. 60 | 61 | # Details on CodeQL's query packs refer to: 62 | # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 63 | queries: +security-and-quality 64 | 65 | 66 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). 67 | # If this step fails, then you should remove it and run the build manually (see below) 68 | - name: Autobuild 69 | uses: github/codeql-action/autobuild@v2 70 | 71 | # Command-line programs to run using the OS shell. 72 | # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 73 | 74 | # If the Autobuild fails above, remove it and uncomment the following three lines. 75 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 76 | 77 | # - run: | 78 | # echo "Run, Build Application using script" 79 | # ./location_of_script_within_repo/buildscript.sh 80 | 81 | - name: Perform CodeQL Analysis 82 | uses: github/codeql-action/analyze@v2 83 | with: 84 | category: "/language:${{matrix.language}}" 85 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | issue_comment: 4 | types: [created] 5 | jobs: 6 | slashCommandDispatch: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Command Dispatch 10 | uses: peter-evans/slash-command-dispatch@v2 11 | with: 12 | token: ${{ secrets.PAT }} 13 | permission: maintain 14 | issue-type: pull-request 15 | commands: | 16 | ok-to-test 17 | -------------------------------------------------------------------------------- /.github/workflows/ok-to-test-command.yml: -------------------------------------------------------------------------------- 1 | name: ok-to-test-command 2 | on: 3 | repository_dispatch: 4 | types: [ok-to-test-command] 5 | 6 | jobs: 7 | # Repo owner has commented /ok-to-test on a pull request. For both a 8 | # fork-based or trusted request a SHA must be supplied which is checked against 9 | # the request SHA. 10 | buildAndTest: 11 | runs-on: [self-hosted, triton] 12 | if: 13 | (github.event.client_payload.slash_command.args.named.sha != '') && 14 | contains(github.event.client_payload.pull_request.head.sha, 15 | github.event.client_payload.slash_command.args.named.sha) 16 | steps: 17 | - name: Build Server Container 18 | if: github.event.client_payload.slash_command.args.named.skipbuild == '' 19 | run: (cd /home/runner && 20 | bash -x ./build.sh --serverbranch "${{ github.event.client_payload.slash_command.args.named.serverbranch }}" --buildargs "--enable-logging --enable-stats --enable-tracing --enable-metrics --enable-gpu-metrics --enable-gpu --endpoint=http --endpoint=grpc --backend=custom --backend=ensemble --backend=identity --backend=repeat --backend=square --backend=onnxruntime:pull/${{ github.event.client_payload.pull_request.number }}/head ${{ github.event.client_payload.slash_command.args.named.buildargs }}") 21 | 22 | - name: Build QA Container 23 | if: github.event.client_payload.slash_command.args.named.skipbuildqa == '' 24 | run: (cd /home/runner && bash -x ./buildqa.sh) 25 | 26 | - name: Run Explicit Tests 27 | id: explicit 28 | if: github.event.client_payload.slash_command.args.named.tests != '' 29 | continue-on-error: true 30 | run: (cd /home/runner && 31 | bash -x ./test.sh --tests "${{ github.event.client_payload.slash_command.args.named.tests }}") 32 | 33 | - name: Run Sanity Tests 34 | id: sanity 35 | if: github.event.client_payload.slash_command.args.named.skipsanity == '' 36 | continue-on-error: true 37 | run: (cd /home/runner && 38 | bash -x ./test.sh --backends onnx --expected 38 --tests "L0_infer" && 39 | bash -x ./test.sh --backends onnx --tests "L0_batcher" && 40 | bash -x ./test.sh --backends onnx --tests "L0_sequence_batcher") 41 | 42 | - name: Show Result 43 | uses: peter-evans/create-or-update-comment@v1 44 | with: 45 | token: ${{ secrets.PAT }} 46 | repository: ${{ github.event.client_payload.github.payload.repository.full_name }} 47 | comment-id: ${{ github.event.client_payload.github.payload.comment.id }} 48 | body: | 49 | ``` 50 | sanity ${{ toJson(steps.sanity.outputs) }} 51 | explicit ${{ toJson(steps.explicit.outputs) }} 52 | ``` 53 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: pre-commit 28 | 29 | on: 30 | pull_request: 31 | 32 | jobs: 33 | pre-commit: 34 | runs-on: ubuntu-22.04 35 | steps: 36 | - uses: actions/checkout@v3 37 | - uses: actions/setup-python@v3 38 | - uses: pre-commit/action@v3.0.0 39 | 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /.vscode 3 | *.so 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | repos: 28 | - repo: https://github.com/timothycrosley/isort 29 | rev: 5.12.0 30 | hooks: 31 | - id: isort 32 | additional_dependencies: [toml] 33 | - repo: https://github.com/psf/black 34 | rev: 23.1.0 35 | hooks: 36 | - id: black 37 | types_or: [python, cython] 38 | - repo: https://github.com/PyCQA/flake8 39 | rev: 5.0.4 40 | hooks: 41 | - id: flake8 42 | args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501] 43 | types_or: [python, cython] 44 | - repo: https://github.com/pre-commit/mirrors-clang-format 45 | rev: v16.0.5 46 | hooks: 47 | - id: clang-format 48 | types_or: [c, c++, cuda, proto, textproto, java] 49 | args: ["-fallback-style=none", "-style=file", "-i"] 50 | - repo: https://github.com/codespell-project/codespell 51 | rev: v2.2.4 52 | hooks: 53 | - id: codespell 54 | additional_dependencies: [tomli] 55 | args: ["--toml", "pyproject.toml"] 56 | exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$) 57 | # More details about these pre-commit hooks here: 58 | # https://pre-commit.com/hooks.html 59 | - repo: https://github.com/pre-commit/pre-commit-hooks 60 | rev: v4.4.0 61 | hooks: 62 | - id: check-case-conflict 63 | - id: check-executables-have-shebangs 64 | - id: check-merge-conflict 65 | - id: check-json 66 | - id: check-toml 67 | - id: check-yaml 68 | - id: check-shebang-scripts-are-executable 69 | - id: end-of-file-fixer 70 | types_or: [c, c++, cuda, proto, textproto, java, python] 71 | - id: mixed-line-ending 72 | - id: requirements-txt-fixer 73 | - id: trailing-whitespace 74 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | cmake_minimum_required(VERSION 3.17) 28 | 29 | project(tritononnxruntimebackend LANGUAGES C CXX) 30 | 31 | # 32 | # Options 33 | # 34 | # To build the ONNX Runtime backend you must either: 35 | # 36 | # - Point to an already built ONNX Runtime using 37 | # TRITON_ONNXRUNTIME_INCLUDE_PATHS and 38 | # TRITON_ONNXRUNTIME_LIB_PATHS 39 | # 40 | # or: 41 | # 42 | # - Set TRITON_BUILD_ONNXRUNTIME_VERSION to the version of ONNX 43 | # Runtime that you want to be built for the backend. 44 | # 45 | # - Set TRITON_BUILD_CONTAINER to the Triton container to use as a 46 | # base for the build. On linux you can instead set 47 | # TRITON_BUILD_CONTAINER_VERSION to the Triton version that you 48 | # want to target with the build and the corresponding container 49 | # from NGC will be used. 50 | # 51 | # - Optionally set TRITON_BUILD_CUDA_VERSION and 52 | # TRITON_BUILD_CUDA_HOME. If not set these are automatically set 53 | # by using the standard cuda install location. For example on 54 | # windows these will be automatically set based on CUDA_PATH, for 55 | # example: 56 | # 57 | # TRITON_BUILD_CUDA_VERSION=11.1 58 | # TRITON_BUILD_CUDA_HOME="C:\Program Files\NVIDIA GPU Computing Toolkit\v11.1" 59 | # 60 | # - If you want TensorRT support set 61 | # TRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON and set TRITON_BUILD_TENSORRT_HOME. 62 | # 63 | # Optionally set TRITON_ONNX_TENSORRT_REPO_TAG to specify a branch in https://github.com/onnx/onnx-tensorrt repo 64 | # example: 65 | # TRITON_ONNX_TENSORRT_REPO_TAG=master 66 | # This enables using a version of tensorrt which is not yet supported in ONNXRuntime release branch. 67 | # By default we pick the default branch which comes with the requested version of onnxruntime. 68 | # 69 | # Optionally set TRT_VERSION to specify the version of TRT which is being used. 70 | # This along with TRITON_BUILD_ONNXRUNTIME_VERSION is used to pick the right onnx tensorrt parser version. 71 | # When TRITON_ONNX_TENSORRT_REPO_TAG is set TRT_VERSION is ignored. 72 | # When neither TRITON_ONNX_TENSORRT_REPO_TAG or TRT_VERSION are set 73 | # the default parser version which comes with ORT is picked. 74 | # 75 | # - If you want OpenVINO support set 76 | # TRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON and set 77 | # TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION to the OpenVino 78 | # version that is compatible with the specified version of ONNX 79 | # Runtime. 80 | # 81 | # - Optionally set TRITON_BUILD_TARGET_PLATFORM to either linux, windows or 82 | # igpu. If not set, the current platform will be used. If building on 83 | # Jetpack, always set to igpu to avoid misdetection. 84 | # 85 | # - If you want to disable GPU usage, set TRITON_ENABLE_GPU=OFF. 86 | # This will make builds with CUDA and TensorRT flags to fail. 87 | # 88 | option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON) 89 | option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON) 90 | option(TRITON_ENABLE_ONNXRUNTIME_TENSORRT 91 | "Enable TensorRT execution provider for ONNXRuntime backend in server" OFF) 92 | option(TRITON_ENABLE_ONNXRUNTIME_OPENVINO 93 | "Enable OpenVINO execution provider for ONNXRuntime backend in server" OFF) 94 | set(TRITON_BUILD_CONTAINER "" CACHE STRING "Triton container to use a base for build") 95 | set(TRITON_BUILD_CONTAINER_VERSION "" CACHE STRING "Triton container version to target") 96 | set(TRITON_BUILD_ONNXRUNTIME_VERSION "" CACHE STRING "ONNXRuntime version to build") 97 | set(TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION "" CACHE STRING "ONNXRuntime OpenVINO version to build") 98 | set(TRITON_BUILD_TARGET_PLATFORM "" CACHE STRING "Target platform for ONNXRuntime build") 99 | set(TRITON_BUILD_CUDA_VERSION "" CACHE STRING "Version of CUDA install") 100 | set(TRITON_BUILD_CUDA_HOME "" CACHE PATH "Path to CUDA install") 101 | set(TRITON_BUILD_CUDNN_HOME "" CACHE PATH "Path to CUDNN install") 102 | set(TRITON_BUILD_TENSORRT_HOME "" CACHE PATH "Path to TensorRT install") 103 | set(TRITON_ONNXRUNTIME_INCLUDE_PATHS "" CACHE PATH "Paths to ONNXRuntime includes") 104 | set(TRITON_ONNX_TENSORRT_REPO_TAG "" CACHE STRING "Tag for onnx-tensorrt repo") 105 | set(TRT_VERSION "" CACHE STRING "TRT version for this build.") 106 | set(TRITON_ONNXRUNTIME_LIB_PATHS "" CACHE PATH "Paths to ONNXRuntime libraries") 107 | 108 | set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE STRING "Git repository to pull from") 109 | set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo") 110 | set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo") 111 | set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo") 112 | 113 | # Use C++17 standard as Triton's minimum required. 114 | set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.") 115 | 116 | if (WIN32) 117 | if(TRITON_ENABLE_ONNXRUNTIME_OPENVINO) 118 | message(FATAL_ERROR 119 | "TRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON not supported for Windows") 120 | endif() 121 | endif() # WIN32 122 | 123 | if (NOT TRITON_ENABLE_GPU) 124 | if (TRITON_ENABLE_ONNXRUNTIME_TENSORRT) 125 | message(FATAL_ERROR "TRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON requires TRITON_ENABLE_GPU=ON") 126 | endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT 127 | endif() # NOT TRITON_ENABLE_GPU 128 | 129 | if(NOT CMAKE_BUILD_TYPE) 130 | set(CMAKE_BUILD_TYPE Release) 131 | endif() 132 | 133 | set(TRITON_ONNXRUNTIME_DOCKER_BUILD OFF) 134 | # Download onnxruntime 135 | include(cmake/download_onnxruntime.cmake) 136 | if(TRITON_ONNXRUNTIME_LIB_PATHS STREQUAL "") 137 | set(TRITON_ONNXRUNTIME_DOCKER_BUILD ON) 138 | endif() 139 | 140 | message(STATUS "Using Onnxruntime docker: ${TRITON_ONNXRUNTIME_DOCKER_BUILD}") 141 | 142 | if(NOT TRITON_ONNXRUNTIME_DOCKER_BUILD) 143 | find_library(ONNXRUNTIME_LIBRARY NAMES onnxruntime PATHS ${TRITON_ONNXRUNTIME_LIB_PATHS} REQUIRED) 144 | if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO}) 145 | find_library(OV_LIBRARY 146 | NAMES openvino 147 | PATHS ${TRITON_ONNXRUNTIME_LIB_PATHS}) 148 | endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO 149 | 150 | else() 151 | 152 | if(NOT TRITON_BUILD_CONTAINER AND NOT TRITON_BUILD_CONTAINER_VERSION) 153 | message(FATAL_ERROR 154 | "TRITON_BUILD_ONNXRUNTIME_VERSION requires TRITON_BUILD_CONTAINER or TRITON_BUILD_CONTAINER_VERSION") 155 | endif() 156 | 157 | if(NOT TRITON_BUILD_CONTAINER) 158 | set(TRITON_BUILD_CONTAINER "nvcr.io/nvidia/tritonserver:${TRITON_BUILD_CONTAINER_VERSION}-py3-min") 159 | endif() 160 | 161 | set(TRITON_ONNXRUNTIME_DOCKER_IMAGE "tritonserver_onnxruntime") 162 | set(TRITON_ONNXRUNTIME_DOCKER_MEMORY "$,32g,8g>") 163 | set(TRITON_ONNXRUNTIME_INCLUDE_PATHS "${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/include") 164 | set(TRITON_ONNXRUNTIME_LIB_PATHS "${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/lib") 165 | if (WIN32) 166 | set(ONNXRUNTIME_LIBRARY "onnxruntime") 167 | else() 168 | set(ONNXRUNTIME_LIBRARY "libonnxruntime.so") 169 | endif() # WIN32 170 | if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO}) 171 | set(OV_LIBRARY "libopenvino.so") 172 | endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO 173 | endif() 174 | 175 | # 176 | # Dependencies 177 | # 178 | # FetchContent's composability isn't very good. We must include the 179 | # transitive closure of all repos so that we can override the tag. 180 | # 181 | include(FetchContent) 182 | 183 | FetchContent_Declare( 184 | repo-common 185 | GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/common.git 186 | GIT_TAG ${TRITON_COMMON_REPO_TAG} 187 | GIT_SHALLOW ON 188 | ) 189 | FetchContent_Declare( 190 | repo-core 191 | GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git 192 | GIT_TAG ${TRITON_CORE_REPO_TAG} 193 | GIT_SHALLOW ON 194 | ) 195 | FetchContent_Declare( 196 | repo-backend 197 | GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/backend.git 198 | GIT_TAG ${TRITON_BACKEND_REPO_TAG} 199 | GIT_SHALLOW ON 200 | ) 201 | FetchContent_MakeAvailable(repo-common repo-core repo-backend) 202 | 203 | # 204 | # CUDA 205 | # 206 | if(${TRITON_ENABLE_GPU}) 207 | find_package(CUDAToolkit REQUIRED) 208 | endif() # TRITON_ENABLE_GPU 209 | 210 | # 211 | # Shared library implementing the Triton Backend API 212 | # 213 | configure_file(src/libtriton_onnxruntime.ldscript libtriton_onnxruntime.ldscript COPYONLY) 214 | 215 | add_library( 216 | triton-onnxruntime-backend SHARED 217 | src/onnxruntime.cc 218 | src/onnxruntime_loader.cc 219 | src/onnxruntime_loader.h 220 | src/onnxruntime_utils.cc 221 | src/onnxruntime_utils.h 222 | ) 223 | 224 | add_library( 225 | TritonOnnxRuntimeBackend::triton-onnxruntime-backend ALIAS triton-onnxruntime-backend 226 | ) 227 | 228 | target_include_directories( 229 | triton-onnxruntime-backend 230 | PRIVATE 231 | ${CMAKE_CURRENT_SOURCE_DIR}/src 232 | ${TRITON_ONNXRUNTIME_INCLUDE_PATHS} 233 | ) 234 | 235 | target_compile_features(triton-onnxruntime-backend PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD}) 236 | target_compile_options( 237 | triton-onnxruntime-backend PRIVATE 238 | $<$,$,$>: 239 | -Wall -Wextra -Wno-unused-parameter -Wno-type-limits> 240 | $<$:/Wall /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor> 241 | ) 242 | 243 | if(${TRITON_ENABLE_GPU}) 244 | target_compile_definitions( 245 | triton-onnxruntime-backend 246 | PRIVATE TRITON_ENABLE_GPU=1 247 | ) 248 | endif() # TRITON_ENABLE_GPU 249 | if(${TRITON_ENABLE_ONNXRUNTIME_TENSORRT}) 250 | target_compile_definitions( 251 | triton-onnxruntime-backend 252 | PRIVATE TRITON_ENABLE_ONNXRUNTIME_TENSORRT=1 253 | ) 254 | endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT 255 | if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO}) 256 | target_compile_definitions( 257 | triton-onnxruntime-backend 258 | PRIVATE TRITON_ENABLE_ONNXRUNTIME_OPENVINO=1 259 | ) 260 | endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO 261 | 262 | if (WIN32) 263 | set_target_properties( 264 | triton-onnxruntime-backend 265 | PROPERTIES 266 | POSITION_INDEPENDENT_CODE ON 267 | OUTPUT_NAME triton_onnxruntime 268 | SKIP_BUILD_RPATH TRUE 269 | BUILD_WITH_INSTALL_RPATH TRUE 270 | INSTALL_RPATH_USE_LINK_PATH FALSE 271 | INSTALL_RPATH "$\{ORIGIN\}" 272 | ) 273 | else () 274 | set_target_properties( 275 | triton-onnxruntime-backend 276 | PROPERTIES 277 | POSITION_INDEPENDENT_CODE ON 278 | OUTPUT_NAME triton_onnxruntime 279 | SKIP_BUILD_RPATH TRUE 280 | BUILD_WITH_INSTALL_RPATH TRUE 281 | INSTALL_RPATH_USE_LINK_PATH FALSE 282 | INSTALL_RPATH "$\{ORIGIN\}" 283 | LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_onnxruntime.ldscript 284 | LINK_FLAGS "-Wl,--version-script libtriton_onnxruntime.ldscript" 285 | ) 286 | endif() 287 | 288 | FOREACH(p ${TRITON_ONNXRUNTIME_LIB_PATHS}) 289 | target_link_directories( 290 | triton-onnxruntime-backend 291 | PRIVATE ${p} 292 | ) 293 | ENDFOREACH(p) 294 | 295 | target_link_libraries( 296 | triton-onnxruntime-backend 297 | PRIVATE 298 | triton-core-serverapi # from repo-core 299 | triton-core-backendapi # from repo-core 300 | triton-core-serverstub # from repo-core 301 | triton-backend-utils # from repo-backend 302 | ${TRITON_ONNXRUNTIME_LDFLAGS} 303 | ${ONNXRUNTIME_LIBRARY} 304 | ) 305 | 306 | if(${TRITON_ENABLE_GPU}) 307 | target_link_libraries( 308 | triton-onnxruntime-backend 309 | PRIVATE 310 | CUDA::cudart 311 | ) 312 | endif() # TRITON_ENABLE_GPU 313 | 314 | if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO}) 315 | target_link_libraries( 316 | triton-onnxruntime-backend 317 | PRIVATE 318 | ${OV_LIBRARY} 319 | ) 320 | endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO 321 | 322 | # 323 | # Build the ONNX Runtime libraries using docker. 324 | # 325 | if(TRITON_ONNXRUNTIME_DOCKER_BUILD) 326 | set(_GEN_FLAGS "") 327 | if(NOT ${TRITON_BUILD_TARGET_PLATFORM} STREQUAL "") 328 | set(_GEN_FLAGS ${_GEN_FLAGS} "--target-platform=${TRITON_BUILD_TARGET_PLATFORM}") 329 | endif() # TRITON_BUILD_TARGET_PLATFORM 330 | if(NOT ${TRITON_BUILD_CUDA_VERSION} STREQUAL "") 331 | set(_GEN_FLAGS ${_GEN_FLAGS} "--cuda-version=${TRITON_BUILD_CUDA_VERSION}") 332 | endif() # TRITON_BUILD_CUDA_VERSION 333 | if(NOT ${TRITON_BUILD_CUDA_HOME} STREQUAL "") 334 | set(_GEN_FLAGS ${_GEN_FLAGS} "--cuda-home=${TRITON_BUILD_CUDA_HOME}") 335 | endif() # TRITON_BUILD_CUDA_HOME 336 | if(NOT ${TRITON_BUILD_CUDNN_HOME} STREQUAL "") 337 | set(_GEN_FLAGS ${_GEN_FLAGS} "--cudnn-home=${TRITON_BUILD_CUDNN_HOME}") 338 | endif() # TRITON_BUILD_CUDNN_HOME 339 | if(NOT ${TRITON_BUILD_TENSORRT_HOME} STREQUAL "") 340 | set(_GEN_FLAGS ${_GEN_FLAGS} "--tensorrt-home=${TRITON_BUILD_TENSORRT_HOME}") 341 | endif() # TRITON_BUILD_TENSORRT_HOME 342 | if(${TRITON_ENABLE_ONNXRUNTIME_TENSORRT}) 343 | set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-tensorrt") 344 | endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT 345 | if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO}) 346 | set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}") 347 | endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO 348 | 349 | set(ENABLE_GPU_EXTRA_ARGS "") 350 | if(${TRITON_ENABLE_GPU}) 351 | set(ENABLE_GPU_EXTRA_ARGS "--enable-gpu") 352 | endif() # TRITON_ENABLE_GPU 353 | 354 | if (WIN32) 355 | add_custom_command( 356 | OUTPUT 357 | onnxruntime/lib/${ONNXRUNTIME_LIBRARY} 358 | COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS} 359 | COMMAND docker build --memory ${TRITON_ONNXRUNTIME_DOCKER_MEMORY} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR} 360 | COMMAND powershell.exe -noprofile -c "docker rm onnxruntime_backend_ort > $null 2>&1; if ($LASTEXITCODE) { 'error ignored...' }; exit 0" 361 | COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} 362 | COMMAND rmdir /s/q onnxruntime 363 | COMMAND docker cp onnxruntime_backend_ort:/opt/onnxruntime onnxruntime 364 | COMMAND docker rm onnxruntime_backend_ort 365 | COMMENT "Building ONNX Runtime" 366 | ) 367 | else() 368 | add_custom_command( 369 | OUTPUT 370 | onnxruntime/lib/${ONNXRUNTIME_LIBRARY} 371 | COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py --ort-build-config="${CMAKE_BUILD_TYPE}" --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS} 372 | COMMAND docker build --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR} 373 | COMMAND docker rm onnxruntime_backend_ort || echo 'error ignored...' || true 374 | COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} 375 | COMMAND rm -fr onnxruntime 376 | COMMAND docker cp onnxruntime_backend_ort:/opt/onnxruntime onnxruntime 377 | COMMAND docker rm onnxruntime_backend_ort 378 | COMMENT "Building ONNX Runtime" 379 | ) 380 | endif() # WIN32 381 | 382 | add_custom_target(ort_target DEPENDS onnxruntime/lib/${ONNXRUNTIME_LIBRARY}) 383 | add_library(onnxruntime-library SHARED IMPORTED GLOBAL) 384 | add_dependencies(onnxruntime-library ort_target) 385 | add_dependencies(triton-onnxruntime-backend onnxruntime-library) 386 | 387 | if (WIN32) 388 | set_target_properties( 389 | onnxruntime-library 390 | PROPERTIES 391 | IMPORTED_LOCATION onnxruntime/bin/${ONNXRUNTIME_LIBRARY} 392 | ) 393 | else() 394 | set_target_properties( 395 | onnxruntime-library 396 | PROPERTIES 397 | IMPORTED_LOCATION onnxruntime/lib/${ONNXRUNTIME_LIBRARY} 398 | ) 399 | endif() # WIN32 400 | endif() # TRITON_ONNXRUNTIME_DOCKER_BUILD 401 | 402 | # 403 | # Install 404 | # 405 | include(GNUInstallDirs) 406 | set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonOnnxRuntimeBackend) 407 | 408 | install( 409 | TARGETS 410 | triton-onnxruntime-backend 411 | EXPORT 412 | triton-onnxruntime-backend-targets 413 | LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime 414 | RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime 415 | ) 416 | 417 | # For Jetson, we build the onnxruntime backend once and re-use 418 | # that tar file. We copy over the libraries and other requirements 419 | # prior to running this build and therefore these set of install 420 | # commands are not needed. 421 | if(TRITON_ONNXRUNTIME_DOCKER_BUILD OR DEFINED TRITON_ONNXRUNTIME_PACKAGE_URL) 422 | install( 423 | DIRECTORY 424 | ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/ 425 | DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime 426 | PATTERN *lib EXCLUDE 427 | PATTERN *bin EXCLUDE 428 | PATTERN *include EXCLUDE 429 | PATTERN *test EXCLUDE 430 | ) 431 | 432 | install( 433 | DIRECTORY 434 | ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/bin/ 435 | USE_SOURCE_PERMISSIONS 436 | DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime 437 | ) 438 | 439 | if (NOT WIN32) 440 | install( 441 | DIRECTORY 442 | ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/lib/ 443 | USE_SOURCE_PERMISSIONS 444 | DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/onnxruntime 445 | ) 446 | 447 | install( 448 | DIRECTORY 449 | ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime/test 450 | USE_SOURCE_PERMISSIONS 451 | DESTINATION ${CMAKE_INSTALL_PREFIX} 452 | ) 453 | endif() # NOT WIN32 454 | endif() # TRITON_ONNXRUNTIME_DOCKER_BUILD 455 | 456 | install( 457 | EXPORT 458 | triton-onnxruntime-backend-targets 459 | FILE 460 | TritonOnnxRuntimeBackendTargets.cmake 461 | NAMESPACE 462 | TritonOnnxRuntimeBackend:: 463 | DESTINATION 464 | ${INSTALL_CONFIGDIR} 465 | ) 466 | 467 | include(CMakePackageConfigHelpers) 468 | configure_package_config_file( 469 | ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonOnnxRuntimeBackendConfig.cmake.in 470 | ${CMAKE_CURRENT_BINARY_DIR}/TritonOnnxRuntimeBackendConfig.cmake 471 | INSTALL_DESTINATION ${INSTALL_CONFIGDIR} 472 | ) 473 | 474 | install( 475 | FILES 476 | ${CMAKE_CURRENT_BINARY_DIR}/TritonOnnxRuntimeBackendConfig.cmake 477 | DESTINATION ${INSTALL_CONFIGDIR} 478 | ) 479 | 480 | # 481 | # Export from build tree 482 | # 483 | export( 484 | EXPORT triton-onnxruntime-backend-targets 485 | FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonOnnxRuntimeBackendTargets.cmake 486 | NAMESPACE TritonOnnxRuntimeBackend:: 487 | ) 488 | 489 | export(PACKAGE TritonOnnxRuntimeBackend) 490 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of NVIDIA CORPORATION nor the names of its 12 | contributors may be used to endorse or promote products derived 13 | from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | [![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause) 30 | 31 | # ONNX Runtime Backend 32 | 33 | The Triton backend for the [ONNX 34 | Runtime](https://github.com/microsoft/onnxruntime). You can learn more 35 | about Triton backends in the [backend 36 | repo](https://github.com/triton-inference-server/backend). Ask 37 | questions or report problems on the [issues 38 | page](https://github.com/triton-inference-server/onnxruntime_backend/issues). 39 | 40 | Use a recent cmake to build and install in a local directory. 41 | Typically you will want to build an appropriate ONNX Runtime 42 | implementation as part of the build. You do this by specifying a ONNX 43 | Runtime version and a Triton container version that you want to use 44 | with the backend. You can find the combination of versions used in a 45 | particular Triton release in the TRITON_VERSION_MAP at the top of 46 | build.py in the branch matching the Triton release you are interested 47 | in. For example, to build the ONNX Runtime backend for Triton 23.04, 48 | use the versions from TRITON_VERSION_MAP in the [r23.04 branch of 49 | build.py](https://github.com/triton-inference-server/server/blob/r23.04/build.py#L73). 50 | 51 | ``` 52 | $ mkdir build 53 | $ cd build 54 | $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_ONNXRUNTIME_VERSION=1.14.1 -DTRITON_BUILD_CONTAINER_VERSION=23.04 .. 55 | $ make install 56 | ``` 57 | 58 | The resulting install/backends/onnxruntime directory can be added to a 59 | Triton installation as /opt/tritonserver/backends/onnxruntime. 60 | 61 | The following required Triton repositories will be pulled and used in 62 | the build. By default the "main" branch/tag will be used for each repo 63 | but the listed CMake argument can be used to override. 64 | 65 | * triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag] 66 | * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag] 67 | * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag] 68 | 69 | You can add TensorRT support to the ONNX Runtime backend by using 70 | -DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON. You can add OpenVino support 71 | by using -DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON 72 | -DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=\, where 73 | \ is the OpenVino version to use and should match the 74 | TRITON_VERSION_MAP entry as described above. So, to build with both 75 | TensorRT and OpenVino support: 76 | 77 | ``` 78 | $ mkdir build 79 | $ cd build 80 | $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_ONNXRUNTIME_VERSION=1.14.1 -DTRITON_BUILD_CONTAINER_VERSION=23.04 -DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON -DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON -DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=2021.2.200 .. 81 | $ make install 82 | ``` 83 | 84 | 85 | ## ONNX Runtime with TensorRT optimization 86 | TensorRT can be used in conjunction with an ONNX model to further optimize the 87 | performance. To enable TensorRT optimization you must set the model configuration 88 | appropriately. There are several optimizations available for TensorRT, like 89 | selection of the compute precision and workspace size. The optimization 90 | parameters and their description are as follows. 91 | 92 | 93 | * `precision_mode`: The precision used for optimization. Allowed values are "FP32", "FP16" and "INT8". Default value is "FP32". 94 | * `max_workspace_size_bytes`: The maximum GPU memory the model can use temporarily during execution. Default value is 1GB. 95 | * `int8_calibration_table_name`: Specify INT8 calibration table name. Applicable when precision_mode=="INT8" and the models do not contain Q/DQ nodes. If calibration table is provided for model with Q/DQ nodes then ORT session creation will fail. 96 | * `int8_use_native_calibration_table`: Calibration table to use. Allowed values are 1 (use native TensorRT generated calibration table) and 0 (use ORT generated calibration table). Default is 0. **Note: Latest calibration table file needs to be copied to trt_engine_cache_path before inference. Calibration table is specific to models and calibration data sets. Whenever new calibration table is generated, old file in the path should be cleaned up or be replaced. 97 | * `trt_engine_cache_enable`: Enable engine caching. 98 | * `trt_engine_cache_path`: Specify engine cache path. 99 | 100 | To explore the usage of more parameters, follow the mapping table below and 101 | check [ONNX Runtime doc](https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#execution-provider-options) for detail. 102 | 103 | > Please link to the latest ONNX Runtime binaries in CMake or build from 104 | [main branch of ONNX Runtime](https://github.com/microsoft/onnxruntime/tree/main) to enable latest options. 105 | 106 | ### Parameter mapping between ONNX Runtime and Triton ONNXRuntime Backend 107 | 108 | | Key in Triton model configuration | Value in Triton model config | Corresponding TensorRT EP option in ONNX Runtime | Type | 109 | | --------------------------------- | --------------------------------------------------- | :----------------------------------------------- | :----- | 110 | | max_workspace_size_bytes | e.g: "4294967296" | trt_max_workspace_size | int | 111 | | trt_max_partition_iterations | e.g: "1000" | trt_max_partition_iterations | int | 112 | | trt_min_subgraph_size | e.g: "1" | trt_min_subgraph_size | int | 113 | | precision_mode | "FP16" | trt_fp16_enable | bool | 114 | | precision_mode | "INT8" | trt_int8_enable | bool | 115 | | int8_calibration_table_name | | trt_int8_calibration_table_name | string | 116 | | int8_use_native_calibration_table | e.g: "1" or "true", "0" or "false" | trt_int8_use_native_calibration_table | bool | 117 | | trt_dla_enable | | trt_dla_enable | bool | 118 | | trt_dla_core | e.g: "0" | trt_dla_core | int | 119 | | trt_engine_cache_enable | e.g: "1" or "true", "0" or "false" | trt_engine_cache_enable | bool | 120 | | trt_engine_cache_path | | trt_engine_cache_path | string | 121 | | trt_engine_cache_prefix | | trt_engine_cache_prefix | string | 122 | | trt_dump_subgraphs | e.g: "1" or "true", "0" or "false" | trt_dump_subgraphs | bool | 123 | | trt_force_sequential_engine_build | e.g: "1" or "true", "0" or "false" | trt_force_sequential_engine_build | bool | 124 | | trt_context_memory_sharing_enable | e.g: "1" or "true", "0" or "false" | trt_context_memory_sharing_enable | bool | 125 | | trt_layer_norm_fp32_fallback | e.g: "1" or "true", "0" or "false" | trt_layer_norm_fp32_fallback | bool | 126 | | trt_timing_cache_enable | e.g: "1" or "true", "0" or "false" | trt_timing_cache_enable | bool | 127 | | trt_timing_cache_path | | trt_timing_cache_path | string | 128 | | trt_force_timing_cache | e.g: "1" or "true", "0" or "false" | trt_force_timing_cache | bool | 129 | | trt_detailed_build_log | e.g: "1" or "true", "0" or "false" | trt_detailed_build_log | bool | 130 | | trt_build_heuristics_enable | e.g: "1" or "true", "0" or "false" | trt_build_heuristics_enable | bool | 131 | | trt_sparsity_enable | e.g: "1" or "true", "0" or "false" | trt_sparsity_enable | bool | 132 | | trt_builder_optimization_level | e.g: "3" | trt_builder_optimization_level | int | 133 | | trt_auxiliary_streams | e.g: "-1" | trt_auxiliary_streams | int | 134 | | trt_tactic_sources | e.g: "-CUDNN,+CUBLAS"; | trt_tactic_sources | string | 135 | | trt_extra_plugin_lib_paths | | trt_extra_plugin_lib_paths | string | 136 | | trt_profile_min_shapes | e.g: "input1:dim1xdimd2...,input2:dim1xdim2...,..." | trt_profile_min_shapes | string | 137 | | trt_profile_max_shapes | e.g: "input1:dim1xdimd2...,input2:dim1xdim2...,..." | trt_profile_max_shapes | string | 138 | | trt_profile_opt_shapes | e.g: "input1:dim1xdimd2...,input2:dim1xdim2...,..." | trt_profile_opt_shapes | string | 139 | | trt_cuda_graph_enable | e.g: "1" or "true", "0" or "false" | trt_cuda_graph_enable | bool | 140 | | trt_dump_ep_context_model | e.g: "1" or "true", "0" or "false" | trt_dump_ep_context_model | bool | 141 | | trt_ep_context_file_path | | trt_ep_context_file_path | string | 142 | | trt_ep_context_embed_mode | e.g: "1" | trt_ep_context_embed_mode | int | 143 | 144 | The section of model config file specifying these parameters will look like: 145 | 146 | ``` 147 | . 148 | . 149 | . 150 | optimization { execution_accelerators { 151 | gpu_execution_accelerator : [ { 152 | name : "tensorrt" 153 | parameters { key: "precision_mode" value: "FP16" } 154 | parameters { key: "max_workspace_size_bytes" value: "1073741824" }} 155 | parameters { key: "trt_engine_cache_enable" value: "1" }} 156 | ] 157 | }} 158 | . 159 | . 160 | . 161 | ``` 162 | 163 | ## ONNX Runtime with CUDA Execution Provider optimization 164 | When GPU is enabled for ORT, CUDA execution provider is enabled. If TensorRT is 165 | also enabled then CUDA EP is treated as a fallback option (only comes into 166 | picture for nodes which TensorRT cannot execute). If TensorRT is not enabled 167 | then CUDA EP is the primary EP which executes the models. ORT enabled 168 | configuring options for CUDA EP to further optimize based on the specific model 169 | and user scenarios. There are several optimizations available, please refer to 170 | the [ONNX Runtime doc](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider) 171 | for more details. To enable CUDA EP optimization you must set the model 172 | configuration appropriately: 173 | 174 | ``` 175 | optimization { execution_accelerators { 176 | gpu_execution_accelerator : [ { 177 | name : "cuda" 178 | parameters { key: "cudnn_conv_use_max_workspace" value: "0" } 179 | parameters { key: "use_ep_level_unified_stream" value: "1" }} 180 | ] 181 | }} 182 | ``` 183 | 184 | ### Deprecated Parameters 185 | The way to specify these specific parameters as shown below is deprecated. For 186 | backward compatibility, these parameters are still supported. Please use the 187 | above method to specify the parameters. 188 | 189 | * `cudnn_conv_algo_search`: CUDA Convolution algorithm search configuration. 190 | Available options are 0 - EXHAUSTIVE (expensive exhaustive benchmarking using 191 | cudnnFindConvolutionForwardAlgorithmEx). This is also the default option, 192 | 1 - HEURISTIC (lightweight heuristic based search using 193 | cudnnGetConvolutionForwardAlgorithm_v7), 2 - DEFAULT (default algorithm using 194 | CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM) 195 | 196 | * `gpu_mem_limit`: CUDA memory limit. To use all possible memory pass in maximum 197 | size_t. Defaults to SIZE_MAX. 198 | 199 | * `arena_extend_strategy`: Strategy used to grow the memory arena. Available 200 | options are: 0 = kNextPowerOfTwo, 1 = kSameAsRequested. Defaults to 0. 201 | 202 | * `do_copy_in_default_stream`: Flag indicating if copying needs to take place on 203 | the same stream as the compute stream in the CUDA EP. Available options are: 204 | 0 = Use separate streams for copying and compute, 1 = Use the same stream for 205 | copying and compute. Defaults to 1. 206 | 207 | In the model config file, specifying these parameters will look like: 208 | 209 | ``` 210 | . 211 | . 212 | . 213 | parameters { key: "cudnn_conv_algo_search" value: { string_value: "0" } } 214 | parameters { key: "gpu_mem_limit" value: { string_value: "4294967200" } } 215 | . 216 | . 217 | . 218 | 219 | ``` 220 | 221 | 222 | ## ONNX Runtime with OpenVINO optimization 223 | 224 | [OpenVINO](https://docs.openvinotoolkit.org/latest/index.html) can be 225 | used in conjunction with an ONNX model to further optimize 226 | performance. To enable OpenVINO optimization you must set the model 227 | configuration as shown below. 228 | 229 | ``` 230 | . 231 | . 232 | . 233 | optimization { execution_accelerators { 234 | cpu_execution_accelerator : [ { 235 | name : "openvino" 236 | } ] 237 | }} 238 | . 239 | . 240 | . 241 | ``` 242 | 243 | ## Other Optimization Options with ONNX Runtime 244 | 245 | Details regarding when to use these options and what to expect from them can be 246 | found [here](https://onnxruntime.ai/docs/performance/tune-performance.html) 247 | 248 | ### Model Config Options 249 | * `intra_op_thread_count`: Sets the number of threads used to parallelize the 250 | execution within nodes. A value of 0 means ORT will pick a default which is 251 | number of cores. 252 | * `inter_op_thread_count`: Sets the number of threads used to parallelize the 253 | execution of the graph (across nodes). If sequential execution is enabled this 254 | value is ignored. 255 | A value of 0 means ORT will pick a default which is number of cores. 256 | * `execution_mode`: Controls whether operators in the graph are executed 257 | sequentially or in parallel. Usually when the model has many branches, setting 258 | this option to 1 .i.e. "parallel" will give you better performance. Default is 259 | 0 which is "sequential execution." 260 | * `level`: Refers to the graph optimization level. By default all optimizations 261 | are enabled. Allowed values are -1, 1 and 2. -1 refers to BASIC optimizations, 262 | 1 refers to basic plus extended optimizations like fusions and 2 refers to all 263 | optimizations being disabled. Please find the details 264 | [here](https://onnxruntime.ai/docs/performance/graph-optimizations.html). 265 | 266 | ``` 267 | optimization { 268 | graph : { 269 | level : 1 270 | }} 271 | 272 | parameters { key: "intra_op_thread_count" value: { string_value: "0" } } 273 | parameters { key: "execution_mode" value: { string_value: "0" } } 274 | parameters { key: "inter_op_thread_count" value: { string_value: "0" } } 275 | 276 | ``` 277 | * `enable_mem_arena`: Use 1 to enable the arena and 0 to disable. See 278 | [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#a0bbd62df2b3c119636fba89192240593) 279 | for more information. 280 | * `enable_mem_pattern`: Use 1 to enable memory pattern and 0 to disable. 281 | See [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#ad13b711736956bf0565fea0f8d7a5d75) 282 | for more information. 283 | * `memory.enable_memory_arena_shrinkage`: 284 | See [this](https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h) 285 | for more information. 286 | * `session.use_device_allocator_for_initializers`: Use "1" to enable using device allocator for allocating initialized tensor memory and "0" to disable. The default is "0". See [this](https://onnxruntime.ai/docs/get-started/with-c.html) for more information. 287 | 288 | ### Command line options 289 | 290 | #### Thread Pools 291 | 292 | When intra and inter op threads is set to 0 or a value higher than 1, by default 293 | ORT creates threadpool per session. This may not be ideal in every scenario, 294 | therefore ORT also supports global threadpools. When global threadpools are 295 | enabled ORT creates 1 global threadpool which is shared by every session. 296 | Use the backend config to enable global threadpool. When global threadpool is 297 | enabled, intra and inter op num threads config should also be provided via 298 | backend config. Config values provided in model config will be ignored. 299 | 300 | ``` 301 | --backend-config=onnxruntime,enable-global-threadpool=<0,1>, --backend-config=onnxruntime,intra_op_thread_count= , --backend-config=onnxruntime,inter_op_thread_count= 302 | ``` 303 | 304 | #### Default Max Batch Size 305 | 306 | The default-max-batch-size value is used for max_batch_size during 307 | [Autocomplete](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#auto-generated-model-configuration) 308 | when no other value is found. Assuming server was not launched with 309 | `--disable-auto-complete-config` command-line option, the onnxruntime backend 310 | will set the max_batch_size of the model to this default value under the 311 | following conditions: 312 | 313 | 1. Autocomplete has determined the model is capable of batching requests. 314 | 2. max_batch_size is 0 in the model configuration or max_batch_size 315 | is omitted from the model configuration. 316 | 317 | If max_batch_size > 1 and no 318 | [scheduler](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#scheduling-and-batching) 319 | is provided, the dynamic batch scheduler will be used. 320 | 321 | ``` 322 | --backend-config=onnxruntime,default-max-batch-size= 323 | ``` 324 | 325 | The default value of `default-max-batch-size` is 4. 326 | -------------------------------------------------------------------------------- /cmake/TritonOnnxRuntimeBackendConfig.cmake.in: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | include(CMakeFindDependencyMacro) 28 | 29 | get_filename_component( 30 | TRITONONNXRUNTIMEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH 31 | ) 32 | 33 | list(APPEND CMAKE_MODULE_PATH ${TRITONONNXRUNTIMEBACKEND_CMAKE_DIR}) 34 | 35 | if(NOT TARGET TritonOnnxRuntimeBackend::triton-onnxruntime-backend) 36 | include("${TRITONONNXRUNTIMEBACKEND_CMAKE_DIR}/TritonOnnxRuntimeBackendTargets.cmake") 37 | endif() 38 | 39 | set(TRITONONNXRUNTIMEBACKEND_LIBRARIES TritonOnnxRuntimeBackend::triton-onnxruntime-backend) 40 | -------------------------------------------------------------------------------- /cmake/download_onnxruntime.cmake: -------------------------------------------------------------------------------- 1 | if(DEFINED TRITON_ONNXRUNTIME_PACKAGE_URL) 2 | 3 | set(DOWNLOAD_PATH "${CMAKE_BINARY_DIR}/_deps/downloads/onnxruntime.zip") 4 | set(EXTRACT_DIR "${CMAKE_BINARY_DIR}/onnxruntime") 5 | 6 | message(NOTICE "Downloading onnxruntime: ${TRITON_ONNXRUNTIME_PACKAGE_URL}") 7 | 8 | file(DOWNLOAD ${TRITON_ONNXRUNTIME_PACKAGE_URL} ${DOWNLOAD_PATH} SHOW_PROGRESS STATUS DOWNLOAD_STATUS) 9 | 10 | # file(DOWNLOAD ... STATUS DOWNLOAD_STATUS) returns a list with 2 elements 11 | list(GET DOWNLOAD_STATUS 0 DOWNLOAD_RESULT) 12 | 13 | if(NOT DOWNLOAD_RESULT EQUAL 0) 14 | message(NOTICE "Failed to download: ${TRITON_ONNXRUNTIME_PACKAGE_URL}") 15 | else() 16 | message(NOTICE "Download successful: ${DOWNLOAD_PATH}" ) 17 | 18 | file(ARCHIVE_EXTRACT INPUT ${DOWNLOAD_PATH} DESTINATION ${EXTRACT_DIR} VERBOSE ) 19 | 20 | file(READ "${EXTRACT_DIR}/VERSION_NUMBER" DOWNLOADED_ONNXRUNTIME_VERSION) 21 | if(${DOWNLOADED_ONNXRUNTIME_VERSION} VERSION_EQUAL ${TRITON_BUILD_ONNXRUNTIME_VERSION}) 22 | message(NOTICE "Downloaded onnxruntime version: ${DOWNLOADED_ONNXRUNTIME_VERSION}") 23 | set(TRITON_ONNXRUNTIME_INCLUDE_PATHS ${EXTRACT_DIR}/include) 24 | set(TRITON_ONNXRUNTIME_LIB_PATHS ${EXTRACT_DIR}/lib) 25 | else() 26 | message(NOTICE "Downloaded onnxruntime version: ${DOWNLOADED_ONNXRUNTIME_VERSION} does not match the required version: ${TRITON_BUILD_ONNXRUNTIME_VERSION}") 27 | endif() 28 | 29 | endif(NOT DOWNLOAD_RESULT EQUAL 0) 30 | 31 | endif(DEFINED TRITON_ONNXRUNTIME_PACKAGE_URL) 32 | 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | [tool.codespell] 28 | # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - 29 | # this is only to allow you to run codespell interactively 30 | skip = "./.git,./.github" 31 | # ignore short words, and typename parameters like OffsetT 32 | ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" 33 | # use the 'clear' dictionary for unambiguous spelling mistakes 34 | builtin = "clear" 35 | # disable warnings about binary files and wrong encoding 36 | quiet-level = 3 37 | 38 | [tool.isort] 39 | profile = "black" 40 | use_parentheses = true 41 | multi_line_output = 3 42 | include_trailing_comma = true 43 | force_grid_wrap = 0 44 | ensure_newline_before_comments = true 45 | line_length = 88 46 | balanced_wrapping = true 47 | indent = " " 48 | skip = ["build"] 49 | 50 | -------------------------------------------------------------------------------- /src/libtriton_onnxruntime.ldscript: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | { 27 | global: 28 | TRITONBACKEND_*; 29 | local: *; 30 | }; 31 | -------------------------------------------------------------------------------- /src/onnxruntime_loader.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "onnxruntime_loader.h" 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "onnxruntime_utils.h" 36 | 37 | namespace triton { namespace backend { namespace onnxruntime { 38 | 39 | std::unique_ptr OnnxLoader::loader = nullptr; 40 | 41 | OnnxLoader::~OnnxLoader() 42 | { 43 | if (env_ != nullptr) { 44 | ort_api->ReleaseEnv(env_); 45 | } 46 | } 47 | 48 | TRITONSERVER_Error* 49 | OnnxLoader::Init(common::TritonJson::Value& backend_config) 50 | { 51 | if (loader == nullptr) { 52 | OrtEnv* env; 53 | // If needed, provide custom logger with 54 | // ort_api->CreateEnvWithCustomLogger() 55 | OrtStatus* status; 56 | OrtLoggingLevel logging_level = 57 | TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_VERBOSE) 58 | ? ORT_LOGGING_LEVEL_VERBOSE 59 | : TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_WARN) 60 | ? ORT_LOGGING_LEVEL_WARNING 61 | : ORT_LOGGING_LEVEL_ERROR; 62 | 63 | // Controls whether to enable global threadpool which will be shared across 64 | // sessions. Use this in conjunction with DisablePerSessionThreads API or 65 | // else the session will use it's own thread pool. 66 | bool global_threadpool_enabled = false; 67 | OrtThreadingOptions* threading_options = nullptr; 68 | 69 | // Read backend config 70 | triton::common::TritonJson::Value cmdline; 71 | if (backend_config.Find("cmdline", &cmdline)) { 72 | triton::common::TritonJson::Value value; 73 | std::string value_str; 74 | if (cmdline.Find("enable-global-threadpool", &value)) { 75 | RETURN_IF_ERROR(value.AsString(&value_str)); 76 | RETURN_IF_ERROR(ParseBoolValue(value_str, &global_threadpool_enabled)); 77 | 78 | if (global_threadpool_enabled) { 79 | // If provided by user, read intra and inter op num thread 80 | // configuration and set ThreadingOptions accordingly. If not, we use 81 | // default 0 which means value equal to number of cores will be used. 82 | RETURN_IF_ORT_ERROR( 83 | ort_api->CreateThreadingOptions(&threading_options)); 84 | if (cmdline.Find("intra_op_thread_count", &value)) { 85 | int intra_op_num_threads = 0; 86 | RETURN_IF_ERROR(value.AsString(&value_str)); 87 | RETURN_IF_ERROR(ParseIntValue(value_str, &intra_op_num_threads)); 88 | if (intra_op_num_threads > 0) { 89 | RETURN_IF_ORT_ERROR(ort_api->SetGlobalIntraOpNumThreads( 90 | threading_options, intra_op_num_threads)); 91 | } 92 | } 93 | if (cmdline.Find("inter_op_thread_count", &value)) { 94 | int inter_op_num_threads = 0; 95 | RETURN_IF_ERROR(value.AsString(&value_str)); 96 | RETURN_IF_ERROR(ParseIntValue(value_str, &inter_op_num_threads)); 97 | if (inter_op_num_threads > 0) { 98 | RETURN_IF_ORT_ERROR(ort_api->SetGlobalInterOpNumThreads( 99 | threading_options, inter_op_num_threads)); 100 | } 101 | } 102 | } 103 | } 104 | } 105 | 106 | if (global_threadpool_enabled && threading_options != nullptr) { 107 | status = ort_api->CreateEnvWithGlobalThreadPools( 108 | logging_level, "log", threading_options, &env); 109 | ort_api->ReleaseThreadingOptions(threading_options); 110 | } else { 111 | status = ort_api->CreateEnv(logging_level, "log", &env); 112 | } 113 | 114 | loader.reset(new OnnxLoader(env, global_threadpool_enabled)); 115 | RETURN_IF_ORT_ERROR(status); 116 | } else { 117 | return TRITONSERVER_ErrorNew( 118 | TRITONSERVER_ERROR_ALREADY_EXISTS, 119 | "OnnxLoader singleton already initialized"); 120 | } 121 | 122 | return nullptr; // success 123 | } 124 | 125 | void 126 | OnnxLoader::TryRelease(bool decrement_session_cnt) 127 | { 128 | std::unique_ptr lloader; 129 | { 130 | std::lock_guard lk(loader->mu_); 131 | if (decrement_session_cnt) { 132 | loader->live_session_cnt_--; 133 | } 134 | 135 | if (loader->closing_ && (loader->live_session_cnt_ == 0)) { 136 | lloader.swap(loader); 137 | } 138 | } 139 | } 140 | 141 | TRITONSERVER_Error* 142 | OnnxLoader::Stop() 143 | { 144 | if (loader != nullptr) { 145 | loader->closing_ = true; 146 | TryRelease(false); 147 | } else { 148 | return TRITONSERVER_ErrorNew( 149 | TRITONSERVER_ERROR_UNAVAILABLE, 150 | "OnnxLoader singleton has not been initialized"); 151 | } 152 | 153 | return nullptr; // success 154 | } 155 | 156 | bool 157 | OnnxLoader::IsGlobalThreadPoolEnabled() 158 | { 159 | if (loader != nullptr) { 160 | return loader->global_threadpool_enabled_; 161 | } 162 | 163 | return false; 164 | } 165 | 166 | TRITONSERVER_Error* 167 | OnnxLoader::LoadSession( 168 | const bool is_path, const std::string& model, 169 | const OrtSessionOptions* session_options, OrtSession** session) 170 | { 171 | #ifdef _WIN32 172 | std::wstring_convert> converter; 173 | std::wstring ort_style_model_str = converter.from_bytes(model); 174 | #else 175 | const auto& ort_style_model_str = model; 176 | #endif 177 | if (loader != nullptr) { 178 | { 179 | std::lock_guard lk(loader->mu_); 180 | if (loader->closing_) { 181 | return TRITONSERVER_ErrorNew( 182 | TRITONSERVER_ERROR_UNAVAILABLE, "OnnxLoader has been stopped"); 183 | } else { 184 | loader->live_session_cnt_++; 185 | } 186 | } 187 | 188 | OrtStatus* status = nullptr; 189 | { 190 | // [FIXME] Remove lock when ORT create session is thread safe [DLIS-4663] 191 | static std::mutex ort_create_session_mu; 192 | std::lock_guard ort_lk(ort_create_session_mu); 193 | 194 | if (!is_path) { 195 | status = ort_api->CreateSessionFromArray( 196 | loader->env_, ort_style_model_str.c_str(), model.size(), 197 | session_options, session); 198 | } else { 199 | status = ort_api->CreateSession( 200 | loader->env_, ort_style_model_str.c_str(), session_options, 201 | session); 202 | } 203 | } 204 | 205 | if (status != nullptr) { 206 | TryRelease(true); 207 | } 208 | RETURN_IF_ORT_ERROR(status); 209 | } else { 210 | return TRITONSERVER_ErrorNew( 211 | TRITONSERVER_ERROR_UNAVAILABLE, 212 | "OnnxLoader singleton has not been initialized"); 213 | } 214 | 215 | return nullptr; // success 216 | } 217 | 218 | TRITONSERVER_Error* 219 | OnnxLoader::UnloadSession(OrtSession* session) 220 | { 221 | if (loader != nullptr) { 222 | ort_api->ReleaseSession(session); 223 | TryRelease(true); 224 | } else { 225 | return TRITONSERVER_ErrorNew( 226 | TRITONSERVER_ERROR_UNAVAILABLE, 227 | "OnnxLoader singleton has not been initialized"); 228 | } 229 | 230 | return nullptr; // success 231 | } 232 | 233 | }}} // namespace triton::backend::onnxruntime 234 | -------------------------------------------------------------------------------- /src/onnxruntime_loader.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | #pragma once 27 | 28 | #include 29 | 30 | #include 31 | #include 32 | 33 | #include "triton/backend/backend_common.h" 34 | #include "triton/core/tritonbackend.h" 35 | 36 | namespace triton { namespace backend { namespace onnxruntime { 37 | 38 | /// A singleton to load Onnx model because loading models requires 39 | /// Onnx Runtime environment which is unique per process 40 | class OnnxLoader { 41 | public: 42 | ~OnnxLoader(); 43 | 44 | /// Initialize loader with default environment settings 45 | static TRITONSERVER_Error* Init(common::TritonJson::Value& backend_config); 46 | 47 | /// Stop loader, and once all Onnx sessions are unloaded via UnloadSession() 48 | /// the resource it allocated will be released 49 | static TRITONSERVER_Error* Stop(); 50 | 51 | /// Load a Onnx model from a path and return the corresponding 52 | /// OrtSession. 53 | /// 54 | /// \param bool is_path If true 'model' is a path to the model file, 55 | /// if false 'model' is the serialized model. 56 | /// \param model The Onnx model or path to the model. 57 | /// \param session_options The options to use when creating the session 58 | /// \param session Returns the Onnx model session 59 | /// \return Error status. 60 | static TRITONSERVER_Error* LoadSession( 61 | const bool is_path, const std::string& model, 62 | const OrtSessionOptions* session_options, OrtSession** session); 63 | 64 | /// Unload a Onnx model session 65 | /// 66 | /// \param session The Onnx model session to be unloaded 67 | static TRITONSERVER_Error* UnloadSession(OrtSession* session); 68 | 69 | /// Returns whether global thread pool is enabled. 70 | /// If the loader is not initialized it returns false. 71 | static bool IsGlobalThreadPoolEnabled(); 72 | 73 | private: 74 | OnnxLoader(OrtEnv* env, bool enable_global_threadpool = false) 75 | : env_(env), global_threadpool_enabled_(enable_global_threadpool), 76 | live_session_cnt_(0), closing_(false) 77 | { 78 | } 79 | 80 | /// Decrease 'live_session_cnt_' if 'decrement_session_cnt' is true, and then 81 | /// release Onnx Runtime environment if it is closing and no live sessions 82 | /// 83 | /// \param decrement_session_cnt Whether to decrease the 'live_session_cnt_' 84 | static void TryRelease(bool decrement_session_cnt); 85 | 86 | static std::unique_ptr loader; 87 | 88 | OrtEnv* env_; 89 | bool global_threadpool_enabled_; 90 | std::mutex mu_; 91 | size_t live_session_cnt_; 92 | bool closing_; 93 | }; 94 | 95 | }}} // namespace triton::backend::onnxruntime 96 | -------------------------------------------------------------------------------- /src/onnxruntime_utils.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "onnxruntime_utils.h" 28 | 29 | namespace triton { namespace backend { namespace onnxruntime { 30 | 31 | const OrtApi* ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION); 32 | 33 | namespace { 34 | 35 | std::string 36 | OnnxTypeName(ONNXType onnx_type) 37 | { 38 | switch (onnx_type) { 39 | case ONNX_TYPE_TENSOR: 40 | return "ONNX_TYPE_TENSOR"; 41 | case ONNX_TYPE_SEQUENCE: 42 | return "ONNX_TYPE_SEQUENCE"; 43 | case ONNX_TYPE_MAP: 44 | return "ONNX_TYPE_MAP"; 45 | case ONNX_TYPE_OPAQUE: 46 | return "ONNX_TYPE_OPAQUE"; 47 | case ONNX_TYPE_SPARSETENSOR: 48 | return "ONNX_TYPE_SPARSETENSOR"; 49 | case ONNX_TYPE_UNKNOWN: 50 | default: 51 | break; 52 | } 53 | 54 | return "ONNX_TYPE_UNKNOWN"; 55 | } 56 | 57 | enum class NameType { 58 | INPUT, 59 | OUTPUT, 60 | INITIALIZER, 61 | }; 62 | 63 | TRITONSERVER_Error* 64 | InputOutputInitializerNames( 65 | OrtSession* session, NameType type, std::set& names) 66 | { 67 | names.clear(); 68 | 69 | size_t num_nodes; 70 | switch (type) { 71 | case NameType::INPUT: 72 | RETURN_IF_ORT_ERROR(ort_api->SessionGetInputCount(session, &num_nodes)); 73 | break; 74 | case NameType::OUTPUT: 75 | RETURN_IF_ORT_ERROR(ort_api->SessionGetOutputCount(session, &num_nodes)); 76 | break; 77 | case NameType::INITIALIZER: 78 | RETURN_IF_ORT_ERROR( 79 | ort_api->SessionGetOverridableInitializerCount(session, &num_nodes)); 80 | break; 81 | } 82 | 83 | // iterate over all input / output nodes 84 | OrtAllocator* allocator; 85 | RETURN_IF_ORT_ERROR(ort_api->GetAllocatorWithDefaultOptions(&allocator)); 86 | OrtStatus* onnx_status = nullptr; 87 | for (size_t i = 0; i < num_nodes; i++) { 88 | char* node_name = nullptr; 89 | switch (type) { 90 | case NameType::INPUT: 91 | onnx_status = 92 | ort_api->SessionGetInputName(session, i, allocator, &node_name); 93 | break; 94 | case NameType::OUTPUT: 95 | onnx_status = 96 | ort_api->SessionGetOutputName(session, i, allocator, &node_name); 97 | break; 98 | case NameType::INITIALIZER: 99 | onnx_status = ort_api->SessionGetOverridableInitializerName( 100 | session, i, allocator, &node_name); 101 | break; 102 | } 103 | 104 | // Make a std::string copy of the name and then free 'node_name' 105 | // since the ORT API makes us responsible for doing that. 106 | std::string name(node_name); 107 | auto free_status = ort_api->AllocatorFree(allocator, node_name); 108 | if (free_status != nullptr) { 109 | LOG_MESSAGE( 110 | TRITONSERVER_LOG_ERROR, 111 | (std::string("onnx runtime allocator free error:") + 112 | std::to_string(ort_api->GetErrorCode(free_status)) + 113 | ort_api->GetErrorMessage(free_status)) 114 | .c_str()); 115 | ort_api->ReleaseStatus(free_status); 116 | } 117 | 118 | if (onnx_status != nullptr) { 119 | break; 120 | } 121 | 122 | names.emplace(std::move(name)); 123 | } 124 | RETURN_IF_ORT_ERROR(onnx_status); 125 | 126 | return nullptr; // success 127 | } 128 | 129 | TRITONSERVER_Error* 130 | InputOutputInitializerInfos( 131 | OrtSession* session, OrtAllocator* allocator, NameType type, 132 | OnnxTensorInfoMap& infos) 133 | { 134 | infos.clear(); 135 | 136 | size_t num_nodes; 137 | switch (type) { 138 | case NameType::INPUT: 139 | RETURN_IF_ORT_ERROR(ort_api->SessionGetInputCount(session, &num_nodes)); 140 | break; 141 | case NameType::OUTPUT: 142 | RETURN_IF_ORT_ERROR(ort_api->SessionGetOutputCount(session, &num_nodes)); 143 | break; 144 | case NameType::INITIALIZER: 145 | RETURN_IF_ORT_ERROR( 146 | ort_api->SessionGetOverridableInitializerCount(session, &num_nodes)); 147 | break; 148 | } 149 | 150 | // iterate over all nodes 151 | for (size_t i = 0; i < num_nodes; i++) { 152 | char* cname = nullptr; 153 | switch (type) { 154 | case NameType::INPUT: 155 | RETURN_IF_ORT_ERROR( 156 | ort_api->SessionGetInputName(session, i, allocator, &cname)); 157 | break; 158 | case NameType::OUTPUT: 159 | RETURN_IF_ORT_ERROR( 160 | ort_api->SessionGetOutputName(session, i, allocator, &cname)); 161 | break; 162 | case NameType::INITIALIZER: 163 | RETURN_IF_ORT_ERROR(ort_api->SessionGetOverridableInitializerName( 164 | session, i, allocator, &cname)); 165 | break; 166 | } 167 | 168 | // Make a std::string copy of the name and then free 'cname' since 169 | // the ORT API makes us responsible for doing that. 170 | std::string name(cname); 171 | auto free_status = ort_api->AllocatorFree(allocator, cname); 172 | if (free_status != nullptr) { 173 | LOG_MESSAGE( 174 | TRITONSERVER_LOG_ERROR, 175 | (std::string("onnx runtime allocator free error:") + 176 | std::to_string(ort_api->GetErrorCode(free_status)) + 177 | ort_api->GetErrorMessage(free_status)) 178 | .c_str()); 179 | ort_api->ReleaseStatus(free_status); 180 | } 181 | 182 | OrtTypeInfo* typeinfo; 183 | switch (type) { 184 | case NameType::INPUT: 185 | RETURN_IF_ORT_ERROR( 186 | ort_api->SessionGetInputTypeInfo(session, i, &typeinfo)); 187 | break; 188 | case NameType::OUTPUT: 189 | RETURN_IF_ORT_ERROR( 190 | ort_api->SessionGetOutputTypeInfo(session, i, &typeinfo)); 191 | break; 192 | case NameType::INITIALIZER: 193 | RETURN_IF_ORT_ERROR(ort_api->SessionGetOverridableInitializerTypeInfo( 194 | session, i, &typeinfo)); 195 | break; 196 | } 197 | 198 | std::unique_ptr typeinfo_wrapper(typeinfo); 199 | 200 | ONNXType onnx_type; 201 | RETURN_IF_ORT_ERROR(ort_api->GetOnnxTypeFromTypeInfo(typeinfo, &onnx_type)); 202 | RETURN_ERROR_IF_TRUE( 203 | onnx_type != ONNX_TYPE_TENSOR, TRITONSERVER_ERROR_UNSUPPORTED, 204 | std::string("Unsupported ONNX Type '") + OnnxTypeName(onnx_type) + 205 | "' for I/O '" + name + "', expected '" + 206 | OnnxTypeName(ONNX_TYPE_TENSOR) + "'."); 207 | 208 | const OrtTensorTypeAndShapeInfo* tensor_info; 209 | RETURN_IF_ORT_ERROR( 210 | ort_api->CastTypeInfoToTensorInfo(typeinfo, &tensor_info)); 211 | 212 | ONNXTensorElementDataType type; 213 | RETURN_IF_ORT_ERROR(ort_api->GetTensorElementType(tensor_info, &type)); 214 | 215 | size_t num_dims; 216 | RETURN_IF_ORT_ERROR(ort_api->GetDimensionsCount(tensor_info, &num_dims)); 217 | 218 | std::vector dims(num_dims); 219 | RETURN_IF_ORT_ERROR( 220 | ort_api->GetDimensions(tensor_info, (int64_t*)dims.data(), num_dims)); 221 | 222 | infos.emplace(std::move(name), OnnxTensorInfo(type, dims)); 223 | } 224 | 225 | return nullptr; // success 226 | } 227 | 228 | } // namespace 229 | 230 | std::string 231 | OnnxDataTypeName(ONNXTensorElementDataType onnx_type) 232 | { 233 | switch (onnx_type) { 234 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: 235 | return "FLOAT"; 236 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: 237 | return "UINT8"; 238 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: 239 | return "INT8"; 240 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: 241 | return "UINT16"; 242 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: 243 | return "INT16"; 244 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: 245 | return "INT32"; 246 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: 247 | return "INT64"; 248 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: 249 | return "STRING"; 250 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: 251 | return "BOOL"; 252 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: 253 | return "FLOAT16"; 254 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: 255 | return "DOUBLE"; 256 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: 257 | return "UINT32"; 258 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: 259 | return "UINT64"; 260 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: 261 | return "COMPLEX64"; 262 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: 263 | return "COMPLEX64"; 264 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: 265 | return "BFLOAT16"; 266 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED: 267 | default: 268 | break; 269 | } 270 | 271 | return "UNDEFINED"; 272 | } 273 | 274 | TRITONSERVER_DataType 275 | ConvertFromOnnxDataType(ONNXTensorElementDataType onnx_type) 276 | { 277 | switch (onnx_type) { 278 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: 279 | // maps to c type float (4 bytes) 280 | return TRITONSERVER_TYPE_FP32; 281 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: 282 | return TRITONSERVER_TYPE_UINT8; 283 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: 284 | return TRITONSERVER_TYPE_INT8; 285 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: 286 | return TRITONSERVER_TYPE_UINT16; 287 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: 288 | return TRITONSERVER_TYPE_INT16; 289 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: 290 | return TRITONSERVER_TYPE_INT32; 291 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: 292 | return TRITONSERVER_TYPE_INT64; 293 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: 294 | return TRITONSERVER_TYPE_BYTES; 295 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: 296 | return TRITONSERVER_TYPE_BOOL; 297 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: 298 | return TRITONSERVER_TYPE_FP16; 299 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: 300 | // maps to c type double (8 bytes) 301 | return TRITONSERVER_TYPE_FP64; 302 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: 303 | return TRITONSERVER_TYPE_UINT32; 304 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: 305 | return TRITONSERVER_TYPE_UINT64; 306 | // The following types are not supported: 307 | // complex with float32 real and imaginary components 308 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: 309 | // complex with float64 real and imaginary components 310 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: 311 | // Non-IEEE floating-point format based on IEEE754 single-precision 312 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: 313 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED: 314 | default: 315 | break; 316 | } 317 | 318 | return TRITONSERVER_TYPE_INVALID; 319 | } 320 | 321 | ONNXTensorElementDataType 322 | ConvertToOnnxDataType(TRITONSERVER_DataType data_type) 323 | { 324 | switch (data_type) { 325 | case TRITONSERVER_TYPE_UINT8: 326 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8; 327 | case TRITONSERVER_TYPE_UINT16: 328 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16; 329 | case TRITONSERVER_TYPE_UINT32: 330 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32; 331 | case TRITONSERVER_TYPE_UINT64: 332 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64; 333 | case TRITONSERVER_TYPE_INT8: 334 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; 335 | case TRITONSERVER_TYPE_INT16: 336 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16; 337 | case TRITONSERVER_TYPE_INT32: 338 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; 339 | case TRITONSERVER_TYPE_INT64: 340 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; 341 | case TRITONSERVER_TYPE_FP16: 342 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; 343 | case TRITONSERVER_TYPE_FP32: 344 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; 345 | case TRITONSERVER_TYPE_FP64: 346 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; 347 | case TRITONSERVER_TYPE_BYTES: 348 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING; 349 | case TRITONSERVER_TYPE_BOOL: 350 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL; 351 | default: 352 | break; 353 | } 354 | 355 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; 356 | } 357 | 358 | ONNXTensorElementDataType 359 | ConvertToOnnxDataType(const std::string& data_type_str) 360 | { 361 | TRITONSERVER_DataType data_type = 362 | TRITONSERVER_StringToDataType(data_type_str.c_str()); 363 | return ConvertToOnnxDataType(data_type); 364 | } 365 | 366 | ONNXTensorElementDataType 367 | ModelConfigDataTypeToOnnxDataType(const std::string& data_type_str) 368 | { 369 | // Must start with "TYPE_". 370 | if (data_type_str.rfind("TYPE_", 0) != 0) { 371 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; 372 | } 373 | 374 | const std::string dtype = data_type_str.substr(strlen("TYPE_")); 375 | 376 | if (dtype == "BOOL") { 377 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL; 378 | } else if (dtype == "UINT8") { 379 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8; 380 | } else if (dtype == "UINT16") { 381 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16; 382 | } else if (dtype == "UINT32") { 383 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32; 384 | } else if (dtype == "UINT64") { 385 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64; 386 | } else if (dtype == "INT8") { 387 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; 388 | } else if (dtype == "INT16") { 389 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16; 390 | } else if (dtype == "INT32") { 391 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; 392 | } else if (dtype == "INT64") { 393 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; 394 | } else if (dtype == "FP16") { 395 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; 396 | } else if (dtype == "FP32") { 397 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; 398 | } else if (dtype == "FP64") { 399 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; 400 | } else if (dtype == "STRING") { 401 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING; 402 | } 403 | 404 | return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; 405 | } 406 | 407 | std::string 408 | OnnxDataTypeToModelConfigDataType(ONNXTensorElementDataType data_type) 409 | { 410 | if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) { 411 | return "TYPE_BOOL"; 412 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) { 413 | return "TYPE_UINT8"; 414 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) { 415 | return "TYPE_UINT16"; 416 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32) { 417 | return "TYPE_UINT32"; 418 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64) { 419 | return "TYPE_UINT64"; 420 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) { 421 | return "TYPE_INT8"; 422 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16) { 423 | return "TYPE_INT16"; 424 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { 425 | return "TYPE_INT32"; 426 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { 427 | return "TYPE_INT64"; 428 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) { 429 | return "TYPE_FP16"; 430 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { 431 | return "TYPE_FP32"; 432 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { 433 | return "TYPE_FP64"; 434 | } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) { 435 | return "TYPE_STRING"; 436 | } 437 | 438 | return "TYPE_INVALID"; 439 | } 440 | 441 | TRITONSERVER_Error* 442 | OverridableInitializerNames(OrtSession* session, std::set& names) 443 | { 444 | return InputOutputInitializerNames(session, NameType::INITIALIZER, names); 445 | } 446 | 447 | TRITONSERVER_Error* 448 | InputNames(OrtSession* session, std::set& names) 449 | { 450 | return InputOutputInitializerNames(session, NameType::INPUT, names); 451 | } 452 | 453 | TRITONSERVER_Error* 454 | OutputNames(OrtSession* session, std::set& names) 455 | { 456 | return InputOutputInitializerNames(session, NameType::OUTPUT, names); 457 | } 458 | 459 | TRITONSERVER_Error* 460 | InputInfos( 461 | OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos) 462 | { 463 | return InputOutputInitializerInfos( 464 | session, allocator, NameType::INPUT, infos); 465 | } 466 | 467 | TRITONSERVER_Error* 468 | OutputInfos( 469 | OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos) 470 | { 471 | return InputOutputInitializerInfos( 472 | session, allocator, NameType::OUTPUT, infos); 473 | } 474 | 475 | TRITONSERVER_Error* 476 | OverridableInitializerInfos( 477 | OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos) 478 | { 479 | return InputOutputInitializerInfos( 480 | session, allocator, NameType::INITIALIZER, infos); 481 | } 482 | 483 | TRITONSERVER_Error* 484 | CompareDimsSupported( 485 | const std::string& model_name, const std::string& tensor_name, 486 | const std::vector& model_shape, const std::vector& dims, 487 | const int max_batch_size, const bool compare_exact) 488 | { 489 | // If the model configuration expects batching support in the model, 490 | // then the onnx shape first dimension must be -1. 491 | const bool supports_batching = (max_batch_size > 0); 492 | if (supports_batching) { 493 | RETURN_ERROR_IF_TRUE( 494 | (model_shape.size() == 0) || (model_shape[0] != -1), 495 | TRITONSERVER_ERROR_INVALID_ARG, 496 | std::string("model '") + model_name + "', tensor '" + tensor_name + 497 | "': for the model to support batching the shape should have at " 498 | "least 1 dimension and the first dimension must be -1; but shape " 499 | "expected by the model is " + 500 | ShapeToString(model_shape)); 501 | 502 | std::vector full_dims; 503 | full_dims.reserve(1 + dims.size()); 504 | full_dims.push_back(-1); 505 | full_dims.insert(full_dims.end(), dims.begin(), dims.end()); 506 | 507 | bool succ = (model_shape.size() == (size_t)full_dims.size()); 508 | if (succ) { 509 | for (size_t i = 0; i < full_dims.size(); ++i) { 510 | const int64_t model_dim = model_shape[i]; 511 | if (compare_exact || (model_dim != -1)) { 512 | succ &= (model_dim == full_dims[i]); 513 | } 514 | } 515 | } 516 | 517 | RETURN_ERROR_IF_TRUE( 518 | !succ, TRITONSERVER_ERROR_INVALID_ARG, 519 | std::string("model '") + model_name + "', tensor '" + tensor_name + 520 | "': the model expects " + std::to_string(model_shape.size()) + 521 | " dimensions (shape " + ShapeToString(model_shape) + 522 | ") but the model configuration specifies " + 523 | std::to_string(full_dims.size()) + 524 | " dimensions (an initial batch dimension because max_batch_size " 525 | "> 0 followed by the explicit tensor shape, making complete " 526 | "shape " + 527 | ShapeToString(full_dims) + ")"); 528 | } else { 529 | // ! supports_batching 530 | bool succ = (model_shape.size() == dims.size()); 531 | if (succ) { 532 | for (size_t i = 0; i < dims.size(); ++i) { 533 | const int64_t model_dim = model_shape[i]; 534 | if (compare_exact || (model_dim != -1)) { 535 | succ &= (model_dim == dims[i]); 536 | } 537 | } 538 | } 539 | 540 | RETURN_ERROR_IF_TRUE( 541 | !succ, TRITONSERVER_ERROR_INVALID_ARG, 542 | std::string("model '") + model_name + "', tensor '" + tensor_name + 543 | "': the model expects " + std::to_string(model_shape.size()) + 544 | " dimensions (shape " + ShapeToString(model_shape) + 545 | ") but the model configuration specifies " + 546 | std::to_string(dims.size()) + " dimensions (shape " + 547 | ShapeToString(dims) + ")"); 548 | } 549 | 550 | return nullptr; // success 551 | } 552 | 553 | 554 | }}} // namespace triton::backend::onnxruntime 555 | -------------------------------------------------------------------------------- /src/onnxruntime_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "triton/backend/backend_common.h" 37 | #include "triton/core/tritonserver.h" 38 | 39 | namespace triton { namespace backend { namespace onnxruntime { 40 | 41 | extern const OrtApi* ort_api; 42 | 43 | #define RESPOND_ALL_AND_SET_TRUE_IF_ORT_ERROR( \ 44 | RESPONSES, RESPONSES_COUNT, BOOL, S) \ 45 | do { \ 46 | OrtStatus* status__ = (S); \ 47 | if (status__ != nullptr) { \ 48 | OrtErrorCode code = ort_api->GetErrorCode(status__); \ 49 | std::string msg = std::string(ort_api->GetErrorMessage(status__)); \ 50 | ort_api->ReleaseStatus(status__); \ 51 | auto err__ = TRITONSERVER_ErrorNew( \ 52 | TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \ 53 | std::to_string(code) + ": " + msg) \ 54 | .c_str()); \ 55 | RESPOND_ALL_AND_SET_TRUE_IF_ERROR( \ 56 | RESPONSES, RESPONSES_COUNT, BOOL, err__); \ 57 | } \ 58 | } while (false) 59 | 60 | #define RETURN_IF_ORT_ERROR(S) \ 61 | do { \ 62 | OrtStatus* status__ = (S); \ 63 | if (status__ != nullptr) { \ 64 | OrtErrorCode code = ort_api->GetErrorCode(status__); \ 65 | std::string msg = std::string(ort_api->GetErrorMessage(status__)); \ 66 | ort_api->ReleaseStatus(status__); \ 67 | return TRITONSERVER_ErrorNew( \ 68 | TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \ 69 | std::to_string(code) + ": " + msg) \ 70 | .c_str()); \ 71 | } \ 72 | } while (false) 73 | 74 | #define THROW_IF_BACKEND_MODEL_ORT_ERROR(S) \ 75 | do { \ 76 | OrtStatus* status__ = (S); \ 77 | if (status__ != nullptr) { \ 78 | OrtErrorCode code = ort_api->GetErrorCode(status__); \ 79 | std::string msg = std::string(ort_api->GetErrorMessage(status__)); \ 80 | ort_api->ReleaseStatus(status__); \ 81 | throw BackendModelException(TRITONSERVER_ErrorNew( \ 82 | TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \ 83 | std::to_string(code) + ": " + msg) \ 84 | .c_str())); \ 85 | } \ 86 | } while (false) 87 | 88 | #define THROW_IF_BACKEND_INSTANCE_ORT_ERROR(S) \ 89 | do { \ 90 | OrtStatus* status__ = (S); \ 91 | if (status__ != nullptr) { \ 92 | OrtErrorCode code = ort_api->GetErrorCode(status__); \ 93 | std::string msg = std::string(ort_api->GetErrorMessage(status__)); \ 94 | ort_api->ReleaseStatus(status__); \ 95 | throw BackendModelInstanceException(TRITONSERVER_ErrorNew( \ 96 | TRITONSERVER_ERROR_INTERNAL, (std::string("onnx runtime error ") + \ 97 | std::to_string(code) + ": " + msg) \ 98 | .c_str())); \ 99 | } \ 100 | } while (false) 101 | 102 | struct OnnxTensorInfo { 103 | OnnxTensorInfo(ONNXTensorElementDataType type, std::vector dims) 104 | : type_(type), dims_(dims) 105 | { 106 | } 107 | 108 | OnnxTensorInfo() {} 109 | 110 | ONNXTensorElementDataType type_{ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED}; 111 | std::vector dims_; 112 | }; 113 | 114 | using OnnxTensorInfoMap = std::unordered_map; 115 | 116 | /// Deleter for OrtTypeInfo. 117 | struct TypeInfoDeleter { 118 | void operator()(OrtTypeInfo* f) { ort_api->ReleaseTypeInfo(f); } 119 | }; 120 | 121 | /// Deleter for OrtSessionOptions. 122 | struct SessionOptionsDeleter { 123 | void operator()(OrtSessionOptions* f) { ort_api->ReleaseSessionOptions(f); } 124 | }; 125 | 126 | std::string OnnxDataTypeName(ONNXTensorElementDataType onnx_type); 127 | 128 | TRITONSERVER_DataType ConvertFromOnnxDataType( 129 | ONNXTensorElementDataType onnx_type); 130 | 131 | ONNXTensorElementDataType ConvertToOnnxDataType( 132 | TRITONSERVER_DataType data_type); 133 | ONNXTensorElementDataType ConvertToOnnxDataType( 134 | const std::string& data_type_str); 135 | 136 | ONNXTensorElementDataType ModelConfigDataTypeToOnnxDataType( 137 | const std::string& data_type_str); 138 | std::string OnnxDataTypeToModelConfigDataType( 139 | ONNXTensorElementDataType data_type); 140 | 141 | TRITONSERVER_Error* OverridableInitializerNames( 142 | OrtSession* session, std::set& names); 143 | TRITONSERVER_Error* InputNames( 144 | OrtSession* session, std::set& names); 145 | TRITONSERVER_Error* OutputNames( 146 | OrtSession* session, std::set& names); 147 | 148 | TRITONSERVER_Error* OverridableInitializerInfos( 149 | OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos); 150 | TRITONSERVER_Error* InputInfos( 151 | OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos); 152 | TRITONSERVER_Error* OutputInfos( 153 | OrtSession* session, OrtAllocator* allocator, OnnxTensorInfoMap& infos); 154 | 155 | TRITONSERVER_Error* CompareDimsSupported( 156 | const std::string& model_name, const std::string& tensor_name, 157 | const std::vector& model_shape, const std::vector& dims, 158 | const int max_batch_size, const bool compare_exact); 159 | 160 | }}} // namespace triton::backend::onnxruntime 161 | -------------------------------------------------------------------------------- /test/initializer_as_input/README.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | This test is originated in "onnxruntime_backend" repository to better 30 | represent the scope of the test, however, this test utilizes Triton utilities 31 | and assumes that the test is located under "qa" directory in "server" repository 32 | for accessing those utilities. Please make sure the test environment is properly 33 | set before running the test. -------------------------------------------------------------------------------- /test/initializer_as_input/generate_test_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import numpy as np 28 | import onnx 29 | 30 | # Reference script on how the model used in this test is created 31 | if __name__ == "__main__": 32 | values = np.ones((5, 5)).astype(np.float32) 33 | onnx_dtype = onnx.TensorProto.FLOAT 34 | initialized_input = onnx.helper.make_tensor( 35 | name="INITIALIZER", 36 | data_type=onnx_dtype, 37 | dims=values.shape, 38 | vals=values.flatten().astype(float), 39 | ) 40 | add = onnx.helper.make_node("Add", ["INPUT", "INITIALIZER"], ["OUTPUT"]) 41 | 42 | input = onnx.helper.make_tensor_value_info("INPUT", onnx_dtype, values.shape) 43 | initializer = onnx.helper.make_tensor_value_info( 44 | "INITIALIZER", onnx_dtype, values.shape 45 | ) 46 | output = onnx.helper.make_tensor_value_info("OUTPUT", onnx_dtype, values.shape) 47 | 48 | graph_proto = onnx.helper.make_graph( 49 | [add], 50 | "init_input", 51 | [input, initializer], 52 | [output], 53 | initializer=[initialized_input], 54 | ) 55 | model_def = onnx.helper.make_model(graph_proto, producer_name="triton") 56 | onnx.save(model_def, "model.onnx") 57 | -------------------------------------------------------------------------------- /test/initializer_as_input/models/add_with_initializer/1/model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/onnxruntime_backend/cf4cd89bf66ed1d7fb89c2b0930d43bcce3c5f97/test/initializer_as_input/models/add_with_initializer/1/model.onnx -------------------------------------------------------------------------------- /test/initializer_as_input/models/add_with_initializer/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | platform: "onnxruntime_onnx" 28 | max_batch_size: 0 29 | input [ 30 | { 31 | name: "INPUT" 32 | data_type: TYPE_FP32 33 | dims: [5, 5] 34 | }, 35 | { 36 | name: "INITIALIZER" 37 | data_type: TYPE_FP32 38 | dims: [5, 5] 39 | optional: true 40 | } 41 | ] 42 | output [ 43 | { 44 | name: "OUTPUT" 45 | data_type: TYPE_FP32 46 | dims: [ 5, 5] 47 | } 48 | ] -------------------------------------------------------------------------------- /test/initializer_as_input/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | import unittest 29 | 30 | import numpy as np 31 | import tritonclient.http as httpclient 32 | 33 | 34 | class OptionalInputTest(unittest.TestCase): 35 | def setUp(self): 36 | self.client_ = httpclient.InferenceServerClient("localhost:8000") 37 | self.model_name_ = "add_with_initializer" 38 | self.input_data_ = np.zeros((5, 5)).astype(np.float32) 39 | self.input_ = httpclient.InferInput("INPUT", self.input_data_.shape, "FP32") 40 | self.input_.set_data_from_numpy(self.input_data_, binary_data=False) 41 | self.optional_input_ = httpclient.InferInput( 42 | "INITIALIZER", self.input_data_.shape, "FP32" 43 | ) 44 | self.optional_input_.set_data_from_numpy(self.input_data_, binary_data=False) 45 | 46 | def test_without_optional(self): 47 | # Send request without providing optional input, the ONNX model 48 | # should use stored initializer value (tensor of all 1s) 49 | results = self.client_.infer(self.model_name_, [self.input_]) 50 | np.testing.assert_allclose(results.as_numpy("OUTPUT"), (self.input_data_ + 1)) 51 | 52 | def test_with_optional(self): 53 | # Send request with optional input provided, the ONNX model 54 | # should use provided value for the initializer 55 | results = self.client_.infer( 56 | self.model_name_, [self.input_, self.optional_input_] 57 | ) 58 | np.testing.assert_allclose( 59 | results.as_numpy("OUTPUT"), (self.input_data_ + self.input_data_) 60 | ) 61 | 62 | 63 | if __name__ == "__main__": 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /test/initializer_as_input/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | export CUDA_VISIBLE_DEVICES=0 29 | 30 | SERVER=/opt/tritonserver/bin/tritonserver 31 | SERVER_ARGS="--model-repository=`pwd`/models" 32 | SERVER_LOG="./server.log" 33 | CLIENT_LOG="./test.log" 34 | source ../common/util.sh 35 | 36 | rm -f *.log 37 | 38 | run_server 39 | if [ "$SERVER_PID" == "0" ]; then 40 | echo -e "\n***\n*** Failed to start $SERVER\n***" 41 | cat $SERVER_LOG 42 | exit 1 43 | fi 44 | 45 | RET=0 46 | 47 | set +e 48 | 49 | python test.py >>$CLIENT_LOG 2>&1 50 | if [ $? -ne 0 ]; then 51 | cat $CLIENT_LOG 52 | echo -e "\n***\n*** Test Failed\n***" 53 | RET=1 54 | fi 55 | 56 | set -e 57 | 58 | kill $SERVER_PID 59 | wait $SERVER_PID 60 | 61 | if [ $RET -eq 0 ]; then 62 | echo -e "\n***\n*** Test Passed\n***" 63 | else 64 | echo -e "\n***\n*** Test FAILED\n***" 65 | fi 66 | 67 | exit $RET 68 | -------------------------------------------------------------------------------- /tools/gen_ort_dockerfile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | import argparse 29 | import os 30 | import platform 31 | import re 32 | 33 | FLAGS = None 34 | 35 | ORT_TO_TRTPARSER_VERSION_MAP = { 36 | "1.9.0": ( 37 | "8.2", # TensorRT version 38 | "release/8.2-GA", # ONNX-Tensorrt parser version 39 | ), 40 | "1.10.0": ( 41 | "8.2", # TensorRT version 42 | "release/8.2-GA", # ONNX-Tensorrt parser version 43 | ), 44 | } 45 | 46 | OPENVINO_VERSION_MAP = { 47 | "2024.0.0": ( 48 | "2024.0", # OpenVINO short version 49 | "2024.0.0.14509.34caeefd078", # OpenVINO version with build number 50 | ), 51 | "2024.1.0": ( 52 | "2024.1", # OpenVINO short version 53 | "2024.1.0.15008.f4afc983258", # OpenVINO version with build number 54 | ), 55 | "2024.4.0": ( 56 | "2024.4", # OpenVINO short version 57 | "2024.4.0.16579.c3152d32c9c", # OpenVINO version with build number 58 | ), 59 | "2024.5.0": ( 60 | "2024.5", # OpenVINO short version 61 | "2024.5.0.17288.7975fa5da0c", # OpenVINO version with build number 62 | ), 63 | "2025.0.0": ( 64 | "2025.0", # OpenVINO short version 65 | "2025.0.0.17942.1f68be9f594", # OpenVINO version with build number 66 | ), 67 | "2025.1.0": ( 68 | "2025.1", # OpenVINO short version 69 | "2025.1.0.18503.6fec06580ab", # OpenVINO version with build number 70 | ), 71 | } 72 | 73 | 74 | def target_platform(): 75 | if FLAGS.target_platform is not None: 76 | return FLAGS.target_platform 77 | return platform.system().lower() 78 | 79 | 80 | def dockerfile_common(): 81 | df = """ 82 | ARG BASE_IMAGE={} 83 | ARG ONNXRUNTIME_VERSION={} 84 | ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime 85 | ARG ONNXRUNTIME_BUILD_CONFIG={} 86 | """.format( 87 | FLAGS.triton_container, FLAGS.ort_version, FLAGS.ort_build_config 88 | ) 89 | 90 | if FLAGS.ort_openvino is not None: 91 | df += """ 92 | ARG ONNXRUNTIME_OPENVINO_VERSION={} 93 | """.format( 94 | FLAGS.ort_openvino 95 | ) 96 | 97 | df += """ 98 | FROM ${BASE_IMAGE} 99 | WORKDIR /workspace 100 | """ 101 | return df 102 | 103 | 104 | def dockerfile_for_linux(output_file): 105 | df = dockerfile_common() 106 | df += """ 107 | # Ensure apt-get won't prompt for selecting options 108 | ENV DEBIAN_FRONTEND=noninteractive 109 | ENV PIP_BREAK_SYSTEM_PACKAGES=1 110 | 111 | # The Onnx Runtime dockerfile is the collection of steps in 112 | # https://github.com/microsoft/onnxruntime/tree/master/dockerfiles 113 | 114 | """ 115 | # Consider moving rhel logic to its own function e.g., dockerfile_for_rhel 116 | # if the changes become more substantial. 117 | if target_platform() == "rhel": 118 | df += """ 119 | # The manylinux container defaults to Python 3.7, but some feature installation 120 | # requires a higher version. 121 | ARG PYVER=3.12 122 | ENV PYTHONPATH=/opt/python/v 123 | RUN ln -sf /opt/python/cp${PYVER/./}* ${PYTHONPATH} 124 | 125 | ENV PYBIN=${PYTHONPATH}/bin 126 | ENV PYTHON_BIN_PATH=${PYBIN}/python${PYVER} \ 127 | PATH=${PYBIN}:${PATH} 128 | 129 | RUN yum install -y \ 130 | wget \ 131 | zip \ 132 | ca-certificates \ 133 | curl \ 134 | python3-pip \ 135 | git \ 136 | gnupg \ 137 | gnupg1 \ 138 | openssl-devel 139 | 140 | RUN pip3 install patchelf==0.17.2 141 | """ 142 | else: 143 | if os.getenv("CCACHE_REMOTE_ONLY") and os.getenv("CCACHE_REMOTE_STORAGE"): 144 | df += """ 145 | ENV CCACHE_REMOTE_ONLY="true" \\ 146 | CCACHE_REMOTE_STORAGE="{}" \\ 147 | CMAKE_CXX_COMPILER_LAUNCHER="ccache" \\ 148 | CMAKE_C_COMPILER_LAUNCHER="ccache" \\ 149 | CMAKE_CUDA_COMPILER_LAUNCHER="ccache" \\ 150 | VERBOSE=1 151 | 152 | RUN apt-get update \\ 153 | && apt-get install -y --no-install-recommends ccache && ccache -p \\ 154 | && rm -rf /var/lib/apt/lists/* 155 | """.format( 156 | os.getenv("CCACHE_REMOTE_STORAGE") 157 | ) 158 | 159 | df += """ 160 | 161 | RUN apt-get update && apt-get install -y --no-install-recommends \ 162 | software-properties-common \ 163 | wget \ 164 | zip \ 165 | ca-certificates \ 166 | build-essential \ 167 | curl \ 168 | libcurl4-openssl-dev \ 169 | libssl-dev \ 170 | python3-dev \ 171 | python3-pip \ 172 | git \ 173 | gnupg \ 174 | gnupg1 175 | 176 | RUN pip3 install patchelf==0.17.2 177 | 178 | # Install dependencies from 179 | # onnxruntime/dockerfiles/scripts/install_common_deps.sh. 180 | RUN apt update -q=2 \\ 181 | && apt install -y gpg wget \\ 182 | && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \\ 183 | && . /etc/os-release \\ 184 | && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \\ 185 | && apt-get update -q=2 \\ 186 | && apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3* \\ 187 | && cmake --version 188 | 189 | """ 190 | 191 | if FLAGS.ort_openvino is not None: 192 | df += """ 193 | # Install OpenVINO 194 | ARG ONNXRUNTIME_OPENVINO_VERSION 195 | ENV INTEL_OPENVINO_DIR /opt/intel/openvino_${ONNXRUNTIME_OPENVINO_VERSION} 196 | """ 197 | df += """ 198 | ARG OPENVINO_SHORT_VERSION={} 199 | ARG OPENVINO_VERSION_WITH_BUILD_NUMBER={} 200 | """.format( 201 | OPENVINO_VERSION_MAP[FLAGS.ort_openvino][0], 202 | OPENVINO_VERSION_MAP[FLAGS.ort_openvino][1], 203 | ) 204 | 205 | # Openvino changed the filename of the toolkit in 2025.0.0 so we need to detect this for 206 | # the release we want to install 207 | openvino_folder_name = "UNKNOWN_FOLDER_NAME" 208 | openvino_toolkit_filename = "UNKNOWN_FILENAME" 209 | if OPENVINO_VERSION_MAP[FLAGS.ort_openvino][0].split(".")[0] >= "2025": 210 | openvino_folder_name = ( 211 | "openvino_toolkit_ubuntu24_${OPENVINO_VERSION_WITH_BUILD_NUMBER}_x86_64" 212 | ) 213 | openvino_toolkit_filename = openvino_folder_name + ".tgz" 214 | else: 215 | openvino_folder_name = "l_openvino_toolkit_ubuntu24_${OPENVINO_VERSION_WITH_BUILD_NUMBER}_x86_64" 216 | openvino_toolkit_filename = openvino_folder_name + ".tgz" 217 | 218 | df += """ 219 | # Step 1: Download and install core components 220 | # Ref: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html#step-1-download-and-install-the-openvino-core-components 221 | RUN curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/${{OPENVINO_SHORT_VERSION}}/linux/{} --output openvino_${{ONNXRUNTIME_OPENVINO_VERSION}}.tgz && \ 222 | tar -xf openvino_${{ONNXRUNTIME_OPENVINO_VERSION}}.tgz && \ 223 | mkdir -p ${{INTEL_OPENVINO_DIR}} && \ 224 | mv {}/* ${{INTEL_OPENVINO_DIR}} && \ 225 | rm openvino_${{ONNXRUNTIME_OPENVINO_VERSION}}.tgz && \ 226 | (cd ${{INTEL_OPENVINO_DIR}}/install_dependencies && \ 227 | ./install_openvino_dependencies.sh -y) && \ 228 | ln -s ${{INTEL_OPENVINO_DIR}} ${{INTEL_OPENVINO_DIR}}/../openvino_`echo ${{ONNXRUNTIME_OPENVINO_VERSION}} | awk '{{print substr($0,0,4)}}'` 229 | 230 | # Step 2: Configure the environment 231 | # Ref: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html#step-2-configure-the-environment 232 | ENV OpenVINO_DIR=$INTEL_OPENVINO_DIR/runtime/cmake 233 | ENV LD_LIBRARY_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64:$LD_LIBRARY_PATH 234 | ENV PKG_CONFIG_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64/pkgconfig 235 | ENV PYTHONPATH=$INTEL_OPENVINO_DIR/python/python3.12:$INTEL_OPENVINO_DIR/python/python3:$PYTHONPATH 236 | """.format( 237 | openvino_toolkit_filename, openvino_folder_name 238 | ) 239 | 240 | ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support. 241 | # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will 242 | # use tensorrt-8.0 branch instead of using rel-1.8.1 243 | # From ORT 1.9 onwards we will switch back to using rel-* branches 244 | if FLAGS.ort_version == "1.8.1": 245 | df += """ 246 | # 247 | # ONNX Runtime build 248 | # 249 | ARG ONNXRUNTIME_VERSION 250 | ARG ONNXRUNTIME_REPO 251 | ARG ONNXRUNTIME_BUILD_CONFIG 252 | 253 | RUN git clone -b tensorrt-8.0 --recursive ${ONNXRUNTIME_REPO} onnxruntime && \ 254 | (cd onnxruntime && git submodule update --init --recursive) 255 | """ 256 | # Use the tensorrt-8.5ea branch to use Tensor RT 8.5a to use the built-in tensorrt parser 257 | elif FLAGS.ort_version == "1.12.1": 258 | df += """ 259 | # 260 | # ONNX Runtime build 261 | # 262 | ARG ONNXRUNTIME_VERSION 263 | ARG ONNXRUNTIME_REPO 264 | ARG ONNXRUNTIME_BUILD_CONFIG 265 | 266 | RUN git clone -b tensorrt-8.5ea --recursive ${ONNXRUNTIME_REPO} onnxruntime && \ 267 | (cd onnxruntime && git submodule update --init --recursive) 268 | """ 269 | else: 270 | df += """ 271 | # 272 | # ONNX Runtime build 273 | # 274 | ARG ONNXRUNTIME_VERSION 275 | ARG ONNXRUNTIME_REPO 276 | ARG ONNXRUNTIME_BUILD_CONFIG 277 | 278 | RUN git clone -b rel-${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \ 279 | (cd onnxruntime && git submodule update --init --recursive) 280 | """ 281 | 282 | if FLAGS.onnx_tensorrt_tag != "": 283 | df += """ 284 | RUN (cd /workspace/onnxruntime/cmake/external/onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch) 285 | """.format( 286 | FLAGS.onnx_tensorrt_tag 287 | ) 288 | 289 | ep_flags = "" 290 | if FLAGS.enable_gpu: 291 | ep_flags = "--use_cuda" 292 | if FLAGS.cuda_version is not None: 293 | ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version) 294 | if FLAGS.cuda_home is not None: 295 | ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home) 296 | if FLAGS.cudnn_home is not None: 297 | ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home) 298 | elif target_platform() == "igpu": 299 | ep_flags += ' --cudnn_home "/usr/include"' 300 | if FLAGS.ort_tensorrt: 301 | ep_flags += " --use_tensorrt" 302 | if FLAGS.ort_version >= "1.12.1": 303 | ep_flags += " --use_tensorrt_builtin_parser" 304 | if FLAGS.tensorrt_home is not None: 305 | ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home) 306 | 307 | if os.name == "posix": 308 | if os.getuid() == 0: 309 | ep_flags += " --allow_running_as_root" 310 | 311 | if FLAGS.ort_openvino is not None: 312 | ep_flags += " --use_openvino CPU" 313 | 314 | if target_platform() == "igpu": 315 | ep_flags += ( 316 | " --skip_tests --cmake_extra_defines 'onnxruntime_BUILD_UNIT_TESTS=OFF'" 317 | ) 318 | cuda_archs = "87;101" 319 | else: 320 | cuda_archs = "75;80;86;89;90;100;120" 321 | 322 | df += """ 323 | WORKDIR /workspace/onnxruntime 324 | ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \ 325 | --compile_no_warning_as_error --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='{}' --cmake_extra_defines CMAKE_POLICY_VERSION_MINIMUM=3.5 " 326 | """.format( 327 | cuda_archs 328 | ) 329 | 330 | df += """ 331 | RUN ./build.sh ${{COMMON_BUILD_ARGS}} --update --build {} 332 | """.format( 333 | ep_flags 334 | ) 335 | 336 | df += """ 337 | # 338 | # Copy all artifacts needed by the backend to /opt/onnxruntime 339 | # 340 | WORKDIR /opt/onnxruntime 341 | 342 | RUN mkdir -p /opt/onnxruntime && \ 343 | cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \ 344 | cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt 345 | 346 | # ONNX Runtime headers, libraries and binaries 347 | RUN mkdir -p /opt/onnxruntime/include && \ 348 | cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \ 349 | /opt/onnxruntime/include && \ 350 | cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \ 351 | /opt/onnxruntime/include && \ 352 | cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \ 353 | /opt/onnxruntime/include 354 | 355 | RUN mkdir -p /opt/onnxruntime/lib && \ 356 | cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \ 357 | /opt/onnxruntime/lib && \ 358 | cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \ 359 | /opt/onnxruntime/lib 360 | """ 361 | if target_platform() == "igpu": 362 | df += """ 363 | RUN mkdir -p /opt/onnxruntime/bin 364 | """ 365 | else: 366 | df += """ 367 | RUN mkdir -p /opt/onnxruntime/bin && \ 368 | cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnxruntime_perf_test \ 369 | /opt/onnxruntime/bin && \ 370 | cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnx_test_runner \ 371 | /opt/onnxruntime/bin && \ 372 | (cd /opt/onnxruntime/bin && chmod a+x *) 373 | """ 374 | 375 | if FLAGS.enable_gpu: 376 | df += """ 377 | RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_cuda.so \ 378 | /opt/onnxruntime/lib 379 | """ 380 | 381 | if FLAGS.ort_tensorrt: 382 | df += """ 383 | # TensorRT specific headers and libraries 384 | RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_tensorrt.so \ 385 | /opt/onnxruntime/lib 386 | """ 387 | 388 | if FLAGS.ort_openvino is not None: 389 | df += """ 390 | # OpenVino specific headers and libraries 391 | RUN cp -r ${INTEL_OPENVINO_DIR}/docs/licensing /opt/onnxruntime/LICENSE.openvino 392 | 393 | RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h \ 394 | /opt/onnxruntime/include 395 | 396 | RUN apt-get update && apt-get install -y --no-install-recommends libtbb12 397 | 398 | RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_openvino.so \ 399 | /opt/onnxruntime/lib && \ 400 | cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} \ 401 | /opt/onnxruntime/lib && \ 402 | cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} \ 403 | /opt/onnxruntime/lib && \ 404 | cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_intel_cpu_plugin.so \ 405 | /opt/onnxruntime/lib && \ 406 | cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \ 407 | /opt/onnxruntime/lib && \ 408 | cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \ 409 | /opt/onnxruntime/lib && \ 410 | cp /usr/lib/x86_64-linux-gnu/libtbb.so.* /opt/onnxruntime/lib 411 | 412 | RUN OV_SHORT_VERSION=`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{ split($0,a,"."); print substr(a[1],3) a[2] a[3] }'` && \ 413 | (cd /opt/onnxruntime/lib && \ 414 | chmod a-x * && \ 415 | ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so.${OV_SHORT_VERSION} && \ 416 | ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so && \ 417 | ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so.${OV_SHORT_VERSION} && \ 418 | ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so && \ 419 | ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so.${OV_SHORT_VERSION} && \ 420 | ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so && \ 421 | ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so.${OV_SHORT_VERSION} && \ 422 | ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so) 423 | """ 424 | # Linking compiled ONNX Runtime libraries to their corresponding versioned libraries 425 | df += """ 426 | RUN cd /opt/onnxruntime/lib \ 427 | && ln -s libonnxruntime.so libonnxruntime.so.1 \ 428 | && ln -s libonnxruntime.so.1 libonnxruntime.so.${ONNXRUNTIME_VERSION} 429 | """ 430 | df += """ 431 | RUN cd /opt/onnxruntime/lib && \ 432 | for i in `find . -mindepth 1 -maxdepth 1 -type f -name '*\\.so*'`; do \ 433 | patchelf --set-rpath '$ORIGIN' $i; \ 434 | done 435 | 436 | # For testing copy ONNX custom op library and model 437 | """ 438 | if target_platform() == "igpu": 439 | df += """ 440 | RUN mkdir -p /opt/onnxruntime/test 441 | """ 442 | else: 443 | df += """ 444 | RUN mkdir -p /opt/onnxruntime/test && \ 445 | cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libcustom_op_library.so \ 446 | /opt/onnxruntime/test && \ 447 | cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/testdata/custom_op_library/custom_op_test.onnx \ 448 | /opt/onnxruntime/test 449 | """ 450 | 451 | with open(output_file, "w") as dfile: 452 | dfile.write(df) 453 | 454 | 455 | def dockerfile_for_windows(output_file): 456 | df = dockerfile_common() 457 | 458 | ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support. 459 | # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will 460 | # use tensorrt-8.0 branch instead of using rel-1.8.1 461 | # From ORT 1.9 onwards we will switch back to using rel-* branches 462 | if FLAGS.ort_version == "1.8.1": 463 | df += """ 464 | SHELL ["cmd", "/S", "/C"] 465 | 466 | # 467 | # ONNX Runtime build 468 | # 469 | ARG ONNXRUNTIME_VERSION 470 | ARG ONNXRUNTIME_REPO 471 | 472 | RUN git clone -b tensorrt-8.0 --recursive %ONNXRUNTIME_REPO% onnxruntime && \ 473 | (cd onnxruntime && git submodule update --init --recursive) 474 | """ 475 | else: 476 | df += """ 477 | SHELL ["cmd", "/S", "/C"] 478 | 479 | # 480 | # ONNX Runtime build 481 | # 482 | ARG ONNXRUNTIME_VERSION 483 | ARG ONNXRUNTIME_REPO 484 | RUN git clone -b rel-%ONNXRUNTIME_VERSION% --recursive %ONNXRUNTIME_REPO% onnxruntime && \ 485 | cd onnxruntime && git submodule update --init --recursive 486 | """ 487 | 488 | if FLAGS.onnx_tensorrt_tag != "": 489 | df += """ 490 | RUN (cd \\workspace\\onnxruntime\\cmake\\external\\onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch) 491 | """.format( 492 | FLAGS.onnx_tensorrt_tag 493 | ) 494 | 495 | ep_flags = "" 496 | if FLAGS.enable_gpu: 497 | ep_flags = "--use_cuda" 498 | if FLAGS.cuda_version is not None: 499 | ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version) 500 | if FLAGS.cuda_home is not None: 501 | ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home) 502 | if FLAGS.cudnn_home is not None: 503 | ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home) 504 | if FLAGS.ort_tensorrt: 505 | ep_flags += " --use_tensorrt" 506 | if FLAGS.tensorrt_home is not None: 507 | ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home) 508 | if FLAGS.ort_openvino is not None: 509 | ep_flags += " --use_openvino CPU" 510 | 511 | df += """ 512 | WORKDIR /workspace/onnxruntime 513 | ARG VS_DEVCMD_BAT="\\BuildTools\\VC\\Auxiliary\\Build\\vcvars64.bat" 514 | RUN powershell Set-Content 'build.bat' -value 'call %VS_DEVCMD_BAT%',(Get-Content 'build.bat') 515 | RUN build.bat --cmake_generator "Visual Studio 17 2022" --config Release --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;86;90;100;120" --skip_submodule_sync --parallel --build_shared_lib --compile_no_warning_as_error --skip_tests --update --build --build_dir /workspace/build {} 516 | """.format( 517 | ep_flags 518 | ) 519 | 520 | df += """ 521 | # 522 | # Copy all artifacts needed by the backend to /opt/onnxruntime 523 | # 524 | WORKDIR /opt/onnxruntime 525 | RUN copy \\workspace\\onnxruntime\\LICENSE \\opt\\onnxruntime 526 | RUN copy \\workspace\\onnxruntime\\cmake\\external\\onnx\\VERSION_NUMBER \\opt\\onnxruntime\\ort_onnx_version.txt 527 | 528 | # ONNX Runtime headers, libraries and binaries 529 | WORKDIR /opt/onnxruntime/include 530 | RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_c_api.h \\opt\\onnxruntime\\include 531 | RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_session_options_config_keys.h \\opt\\onnxruntime\\include 532 | RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\cpu\\cpu_provider_factory.h \\opt\\onnxruntime\\include 533 | 534 | WORKDIR /opt/onnxruntime/bin 535 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.dll \\opt\\onnxruntime\\bin 536 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.dll \\opt\\onnxruntime\\bin 537 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_perf_test.exe \\opt\\onnxruntime\\bin 538 | RUN copy \\workspace\\build\\Release\\Release\\onnx_test_runner.exe \\opt\\onnxruntime\\bin 539 | 540 | WORKDIR /opt/onnxruntime/lib 541 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.lib \\opt\\onnxruntime\\lib 542 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.lib \\opt\\onnxruntime\\lib 543 | """ 544 | 545 | if FLAGS.enable_gpu: 546 | df += """ 547 | WORKDIR /opt/onnxruntime/lib 548 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.lib \\opt\\onnxruntime\\lib 549 | WORKDIR /opt/onnxruntime/bin 550 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.dll \\opt\\onnxruntime\\bin 551 | """ 552 | 553 | if FLAGS.ort_tensorrt: 554 | df += """ 555 | # TensorRT specific headers and libraries 556 | WORKDIR /opt/onnxruntime/lib 557 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.dll \\opt\\onnxruntime\\bin 558 | 559 | WORKDIR /opt/onnxruntime/lib 560 | RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.lib \\opt\\onnxruntime\\lib 561 | """ 562 | with open(output_file, "w") as dfile: 563 | dfile.write(df) 564 | 565 | 566 | def preprocess_gpu_flags(): 567 | if target_platform() == "windows": 568 | # Default to CUDA based on CUDA_PATH envvar and TensorRT in 569 | # C:/tensorrt 570 | if "CUDA_PATH" in os.environ: 571 | if FLAGS.cuda_home is None: 572 | FLAGS.cuda_home = os.environ["CUDA_PATH"] 573 | elif FLAGS.cuda_home != os.environ["CUDA_PATH"]: 574 | print("warning: --cuda-home does not match CUDA_PATH envvar") 575 | 576 | if FLAGS.cudnn_home is None: 577 | FLAGS.cudnn_home = FLAGS.cuda_home 578 | 579 | version = None 580 | m = re.match(r".*v([1-9]?[0-9]+\.[0-9]+)$", FLAGS.cuda_home) 581 | if m: 582 | version = m.group(1) 583 | 584 | if FLAGS.cuda_version is None: 585 | FLAGS.cuda_version = version 586 | elif FLAGS.cuda_version != version: 587 | print("warning: --cuda-version does not match CUDA_PATH envvar") 588 | 589 | if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None): 590 | print("error: windows build requires --cuda-version and --cuda-home") 591 | 592 | if FLAGS.tensorrt_home is None: 593 | FLAGS.tensorrt_home = "/tensorrt" 594 | else: 595 | if "CUDNN_VERSION" in os.environ: 596 | if FLAGS.cudnn_home is None: 597 | FLAGS.cudnn_home = "/usr" 598 | 599 | if FLAGS.cuda_home is None: 600 | FLAGS.cuda_home = "/usr/local/cuda" 601 | 602 | if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None): 603 | print("error: linux build requires --cudnn-home and --cuda-home") 604 | 605 | if FLAGS.tensorrt_home is None: 606 | if target_platform() == "rhel": 607 | if platform.machine().lower() == "aarch64": 608 | FLAGS.tensorrt_home = "/usr/local/cuda/targets/sbsa-linux/" 609 | else: 610 | FLAGS.tensorrt_home = "/usr/local/cuda/targets/x86_64-linux/" 611 | else: 612 | FLAGS.tensorrt_home = "/usr/src/tensorrt" 613 | 614 | 615 | if __name__ == "__main__": 616 | parser = argparse.ArgumentParser() 617 | 618 | parser.add_argument( 619 | "--triton-container", 620 | type=str, 621 | required=True, 622 | help="Triton base container to use for ORT build.", 623 | ) 624 | parser.add_argument("--ort-version", type=str, required=True, help="ORT version.") 625 | parser.add_argument( 626 | "--output", type=str, required=True, help="File to write Dockerfile to." 627 | ) 628 | parser.add_argument( 629 | "--enable-gpu", action="store_true", required=False, help="Enable GPU support" 630 | ) 631 | parser.add_argument( 632 | "--ort-build-config", 633 | type=str, 634 | default="Release", 635 | choices=["Debug", "Release", "RelWithDebInfo"], 636 | help="ORT build configuration.", 637 | ) 638 | parser.add_argument( 639 | "--target-platform", 640 | required=False, 641 | default=None, 642 | help='Target for build, can be "linux", "windows", "rhel", or "igpu". If not specified, build targets the current platform.', 643 | ) 644 | 645 | parser.add_argument( 646 | "--cuda-version", type=str, required=False, help="Version for CUDA." 647 | ) 648 | parser.add_argument( 649 | "--cuda-home", type=str, required=False, help="Home directory for CUDA." 650 | ) 651 | parser.add_argument( 652 | "--cudnn-home", type=str, required=False, help="Home directory for CUDNN." 653 | ) 654 | parser.add_argument( 655 | "--ort-openvino", 656 | type=str, 657 | required=False, 658 | help="Enable OpenVino execution provider using specified OpenVINO version.", 659 | ) 660 | parser.add_argument( 661 | "--ort-tensorrt", 662 | action="store_true", 663 | required=False, 664 | help="Enable TensorRT execution provider.", 665 | ) 666 | parser.add_argument( 667 | "--tensorrt-home", type=str, required=False, help="Home directory for TensorRT." 668 | ) 669 | parser.add_argument( 670 | "--onnx-tensorrt-tag", type=str, default="", help="onnx-tensorrt repo tag." 671 | ) 672 | parser.add_argument("--trt-version", type=str, default="", help="TRT version.") 673 | 674 | FLAGS = parser.parse_args() 675 | if FLAGS.enable_gpu: 676 | preprocess_gpu_flags() 677 | 678 | # if a tag is provided by the user, then simply use it 679 | # if the tag is empty - check whether there is an entry in the ORT_TO_TRTPARSER_VERSION_MAP 680 | # map corresponding to ort version + trt version combo. If yes then use it 681 | # otherwise we leave it empty and use the defaults from ort 682 | if ( 683 | FLAGS.onnx_tensorrt_tag == "" 684 | and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys() 685 | ): 686 | trt_version = re.match(r"^[0-9]+\.[0-9]+", FLAGS.trt_version) 687 | if ( 688 | trt_version 689 | and trt_version.group(0) 690 | == ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][0] 691 | ): 692 | FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][1] 693 | 694 | if target_platform() == "windows": 695 | # OpenVINO EP not yet supported for windows build 696 | if FLAGS.ort_openvino is not None: 697 | print("warning: OpenVINO not supported for windows, ignoring") 698 | FLAGS.ort_openvino = None 699 | dockerfile_for_windows(FLAGS.output) 700 | else: 701 | dockerfile_for_linux(FLAGS.output) 702 | --------------------------------------------------------------------------------