├── .clang-format ├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .github └── workflows │ ├── codeql.yml │ └── pre-commit.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode └── tasks.json ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake └── TritonPythonBackendConfig.cmake.in ├── examples ├── add_sub │ ├── client.py │ ├── config.pbtxt │ └── model.py ├── auto_complete │ ├── README.md │ ├── batch_model.py │ ├── client.py │ └── nobatch_model.py ├── bls │ ├── README.md │ ├── async_client.py │ ├── async_config.pbtxt │ ├── async_model.py │ ├── sync_client.py │ ├── sync_config.pbtxt │ └── sync_model.py ├── bls_decoupled │ ├── README.md │ ├── async_client.py │ ├── async_config.pbtxt │ ├── async_model.py │ ├── sync_client.py │ ├── sync_config.pbtxt │ └── sync_model.py ├── custom_metrics │ ├── README.md │ ├── client.py │ ├── config.pbtxt │ └── model.py ├── decoupled │ ├── README.md │ ├── repeat_client.py │ ├── repeat_config.pbtxt │ ├── repeat_model.py │ ├── square_client.py │ ├── square_config.pbtxt │ └── square_model.py ├── instance_kind │ ├── README.md │ ├── client.py │ ├── config.pbtxt │ ├── model.py │ └── resnet50_labels.txt ├── jax │ ├── README.md │ ├── client.py │ ├── config.pbtxt │ └── model.py ├── preprocessing │ ├── README.md │ ├── client.py │ ├── model.py │ ├── model_repository │ │ ├── ensemble_python_resnet50 │ │ │ └── config.pbtxt │ │ ├── preprocess │ │ │ └── config.pbtxt │ │ └── resnet50_trt │ │ │ ├── config.pbtxt │ │ │ └── labels.txt │ └── onnx_exporter.py └── pytorch │ ├── client.py │ ├── config.pbtxt │ └── model.py ├── inferentia ├── README.md ├── qa │ ├── Dockerfile.QA │ └── setup_test_enviroment_and_test.sh └── scripts │ ├── gen_triton_model.py │ ├── setup-pre-container.sh │ └── setup.sh ├── pyproject.toml └── src ├── correlation_id.cc ├── correlation_id.h ├── gpu_buffers.cc ├── gpu_buffers.h ├── infer_payload.cc ├── infer_payload.h ├── infer_request.cc ├── infer_request.h ├── infer_response.cc ├── infer_response.h ├── infer_trace.cc ├── infer_trace.h ├── ipc_message.cc ├── ipc_message.h ├── libtriton_python.ldscript ├── memory_manager.cc ├── memory_manager.h ├── message_queue.h ├── metric.cc ├── metric.h ├── metric_family.cc ├── metric_family.h ├── model_loader.cc ├── model_loader.h ├── pb_bls_cancel.cc ├── pb_bls_cancel.h ├── pb_cancel.cc ├── pb_cancel.h ├── pb_env.cc ├── pb_env.h ├── pb_error.cc ├── pb_error.h ├── pb_exception.h ├── pb_log.cc ├── pb_log.h ├── pb_map.cc ├── pb_map.h ├── pb_memory.cc ├── pb_memory.h ├── pb_metric_reporter.cc ├── pb_metric_reporter.h ├── pb_preferred_memory.h ├── pb_response_iterator.cc ├── pb_response_iterator.h ├── pb_string.cc ├── pb_string.h ├── pb_stub.cc ├── pb_stub.h ├── pb_stub_utils.cc ├── pb_stub_utils.h ├── pb_tensor.cc ├── pb_tensor.h ├── pb_utils.cc ├── pb_utils.h ├── python_be.cc ├── python_be.h ├── request_executor.cc ├── request_executor.h ├── resources └── triton_python_backend_utils.py ├── response_sender.cc ├── response_sender.h ├── scoped_defer.cc ├── scoped_defer.h ├── shm_manager.cc ├── shm_manager.h ├── shm_monitor ├── CMakeLists.txt └── shm_monitor.cc ├── stub_launcher.cc └── stub_launcher.h /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | 4 | IndentWidth: 2 5 | ColumnLimit: 80 6 | ContinuationIndentWidth: 4 7 | UseTab: Never 8 | MaxEmptyLinesToKeep: 2 9 | 10 | SortIncludes: true 11 | CompactNamespaces: true 12 | ReflowComments: true 13 | 14 | DerivePointerAlignment: false 15 | PointerAlignment: Left 16 | 17 | AllowShortIfStatementsOnASingleLine: false 18 | AllowShortBlocksOnASingleLine: false 19 | AllowShortFunctionsOnASingleLine: Inline 20 | 21 | AlwaysBreakAfterReturnType: TopLevelDefinitions 22 | AlignAfterOpenBracket: AlwaysBreak 23 | BreakBeforeBraces: Custom 24 | BraceWrapping: 25 | AfterClass: false 26 | AfterControlStatement: false 27 | AfterEnum: false 28 | AfterFunction: true 29 | AfterNamespace: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | BeforeCatch: true 33 | 34 | BinPackArguments: true 35 | BinPackParameters: true 36 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 37 | 38 | IndentCaseLabels: true 39 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | FROM nvcr.io/nvidia/tritonserver:24.03-py3 28 | 29 | ARG USERNAME=triton-server 30 | 31 | RUN apt-get update \ 32 | && apt-get install -y sudo 33 | 34 | RUN pip3 install transformers torch 35 | 36 | # Create the user 37 | RUN apt-get update \ 38 | && apt-get install -y sudo \ 39 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ 40 | && chmod 0440 /etc/sudoers.d/$USERNAME 41 | 42 | RUN pip3 install pre-commit ipdb 43 | 44 | RUN mkhomedir_helper triton-server 45 | 46 | RUN apt-get install -y cmake rapidjson-dev 47 | 48 | USER ${USERNAME} 49 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python Backend", 3 | 4 | "build": { 5 | "dockerfile": "Dockerfile" 6 | }, 7 | "customizations": { 8 | "vscode": { 9 | "extensions": [ 10 | "ms-python.vscode-pylance", 11 | "ms-python.python", 12 | "ms-vscode.cpptools-extension-pack", 13 | "ms-vscode.cmake-tools", 14 | "github.vscode-pull-request-github" 15 | ] 16 | } 17 | }, 18 | "postCreateCommand": "sudo chown -R triton-server:triton-server ~/.cache", 19 | 20 | "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined", "--gpus=all", "--shm-size=2g", "--ulimit", "stack=67108864" ], 21 | "mounts": [ 22 | "source=${localEnv:HOME}/.ssh,target=/home/triton-server/.ssh,type=bind,consistency=cached", 23 | "source=${localEnv:HOME}/.cache/huggingface,target=/home/triton-server/.cache/huggingface,type=bind,consistency=cached" 24 | ], 25 | "remoteUser": "triton-server" 26 | } 27 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "CodeQL" 28 | 29 | on: 30 | pull_request: 31 | 32 | jobs: 33 | analyze: 34 | name: Analyze 35 | runs-on: ubuntu-latest 36 | permissions: 37 | actions: read 38 | contents: read 39 | security-events: write 40 | 41 | strategy: 42 | fail-fast: false 43 | matrix: 44 | language: [ 'python' ] 45 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 46 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 47 | 48 | steps: 49 | - name: Checkout repository 50 | uses: actions/checkout@v3 51 | 52 | # Initializes the CodeQL tools for scanning. 53 | - name: Initialize CodeQL 54 | uses: github/codeql-action/init@v2 55 | with: 56 | languages: ${{ matrix.language }} 57 | # If you wish to specify custom queries, you can do so here or in a config file. 58 | # By default, queries listed here will override any specified in a config file. 59 | # Prefix the list here with "+" to use these queries and those in the config file. 60 | 61 | # Details on CodeQL's query packs refer to: 62 | # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 63 | queries: +security-and-quality 64 | 65 | 66 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). 67 | # If this step fails, then you should remove it and run the build manually (see below) 68 | - name: Autobuild 69 | uses: github/codeql-action/autobuild@v2 70 | 71 | # Command-line programs to run using the OS shell. 72 | # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 73 | 74 | # If the Autobuild fails above, remove it and uncomment the following three lines. 75 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 76 | 77 | # - run: | 78 | # echo "Run, Build Application using script" 79 | # ./location_of_script_within_repo/buildscript.sh 80 | 81 | - name: Perform CodeQL Analysis 82 | uses: github/codeql-action/analyze@v2 83 | with: 84 | category: "/language:${{matrix.language}}" 85 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: pre-commit 28 | 29 | on: 30 | pull_request: 31 | 32 | jobs: 33 | pre-commit: 34 | runs-on: ubuntu-22.04 35 | steps: 36 | - uses: actions/checkout@v3 37 | - uses: actions/setup-python@v3 38 | - uses: pre-commit/action@v3.0.0 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | *.so 3 | builddir 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | pytestdebug.log 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | doc/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # vscode 142 | .vscode/settings.json 143 | .vscode/c_cpp_properties.json 144 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | repos: 28 | - repo: https://github.com/timothycrosley/isort 29 | rev: 5.12.0 30 | hooks: 31 | - id: isort 32 | additional_dependencies: [toml] 33 | - repo: https://github.com/psf/black 34 | rev: 23.1.0 35 | hooks: 36 | - id: black 37 | types_or: [python, cython] 38 | - repo: https://github.com/PyCQA/flake8 39 | rev: 5.0.4 40 | hooks: 41 | - id: flake8 42 | args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501] 43 | types_or: [python, cython] 44 | - repo: https://github.com/pre-commit/mirrors-clang-format 45 | rev: v16.0.5 46 | hooks: 47 | - id: clang-format 48 | types_or: [c, c++, cuda, proto, textproto, java] 49 | args: ["-fallback-style=none", "-style=file", "-i"] 50 | - repo: https://github.com/codespell-project/codespell 51 | rev: v2.2.4 52 | hooks: 53 | - id: codespell 54 | additional_dependencies: [tomli] 55 | args: ["--toml", "pyproject.toml"] 56 | exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$) 57 | # More details about these pre-commit hooks here: 58 | # https://pre-commit.com/hooks.html 59 | - repo: https://github.com/pre-commit/pre-commit-hooks 60 | rev: v4.4.0 61 | hooks: 62 | - id: check-case-conflict 63 | - id: check-executables-have-shebangs 64 | - id: check-merge-conflict 65 | - id: check-json 66 | - id: check-toml 67 | - id: check-yaml 68 | - id: check-shebang-scripts-are-executable 69 | - id: end-of-file-fixer 70 | types_or: [c, c++, cuda, proto, textproto, java, python] 71 | - id: mixed-line-ending 72 | - id: requirements-txt-fixer 73 | - id: trailing-whitespace 74 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "Configure", 6 | "type": "shell", 7 | "command": "cmake", 8 | "args": [ 9 | "-DCMAKE_INSTALL_PREFIX:STRING=/opt/tritonserver/", 10 | "-DTRITON_COMMON_REPO_TAG:STRING=main", 11 | "-DTRITON_BACKEND_REPO_TAG:STRING=main", 12 | "-DTRITON_CORE_REPO_TAG:STRING=main", 13 | "-DTRITON_ENABLE_GPU:STRING=ON", 14 | "-DTRITON_ENABLE_NVTX:STRING=ON", 15 | "-DCMAKE_INSTALL_PREFIX:STRING=${workspaceFolder}/build/install", 16 | "-DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE", 17 | "-DCMAKE_BUILD_TYPE:STRING=Debug", 18 | "-DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc", 19 | "-DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++", 20 | "-S${workspaceFolder}", 21 | "-B${workspaceFolder}/build", 22 | "-G", 23 | "Unix Makefiles" 24 | ], 25 | "problemMatcher": [] 26 | }, 27 | { 28 | "label": "Build", 29 | "type": "shell", 30 | "command": "cmake", 31 | "args": [ 32 | "--build", 33 | "/${workspaceFolder}/build", 34 | "--config", 35 | "Debug", 36 | "--target", 37 | "all", 38 | "-j", 39 | "18", 40 | "--" 41 | ] 42 | }, 43 | { 44 | "label": "Install", 45 | "type": "shell", 46 | "command": "cmake", 47 | "args": [ 48 | "--build", 49 | "${workspaceFolder}/build", 50 | "--config", 51 | "Debug", 52 | "--target", 53 | "install", 54 | "-j", 55 | "18", 56 | "--" 57 | ] 58 | }, 59 | { 60 | "label": "Move", 61 | "type": "shell", 62 | "command": "sudo", 63 | "args": [ 64 | "cp", 65 | "-r", 66 | "${workspaceFolder}/build/install/backends/python/*", 67 | "/opt/tritonserver/backends/python" 68 | ] 69 | }, 70 | { 71 | "label": "Build Python Backend", 72 | "dependsOrder": "sequence", 73 | "dependsOn": [ 74 | "Configure", 75 | "Build", 76 | "Install", 77 | "Move" 78 | ], 79 | "group": { 80 | "kind": "build", 81 | "isDefault": true 82 | } 83 | } 84 | ] 85 | } 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of NVIDIA CORPORATION nor the names of its 12 | contributors may be used to endorse or promote products derived 13 | from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /cmake/TritonPythonBackendConfig.cmake.in: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | include(CMakeFindDependencyMacro) 29 | 30 | get_filename_component( 31 | TRITONPYTHONBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH 32 | ) 33 | 34 | list(APPEND CMAKE_MODULE_PATH ${TRITONPYTHONBACKEND_CMAKE_DIR}) 35 | 36 | if(NOT TARGET TritonPythonBackend::triton-python-backend) 37 | include("${TRITONPYTHONBACKEND_CMAKE_DIR}/TritonPythonBackendTargets.cmake") 38 | endif() 39 | 40 | set(TRITONPYTHONBACKEND_LIBRARIES TritonPythonBackend::triton-python-backend) -------------------------------------------------------------------------------- /examples/add_sub/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import tritonclient.http as httpclient 31 | from tritonclient.utils import * 32 | 33 | model_name = "add_sub" 34 | shape = [4] 35 | 36 | with httpclient.InferenceServerClient("localhost:8000") as client: 37 | input0_data = np.random.rand(*shape).astype(np.float32) 38 | input1_data = np.random.rand(*shape).astype(np.float32) 39 | inputs = [ 40 | httpclient.InferInput( 41 | "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype) 42 | ), 43 | httpclient.InferInput( 44 | "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype) 45 | ), 46 | ] 47 | 48 | inputs[0].set_data_from_numpy(input0_data) 49 | inputs[1].set_data_from_numpy(input1_data) 50 | 51 | outputs = [ 52 | httpclient.InferRequestedOutput("OUTPUT0"), 53 | httpclient.InferRequestedOutput("OUTPUT1"), 54 | ] 55 | 56 | response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) 57 | 58 | result = response.get_response() 59 | output0_data = response.as_numpy("OUTPUT0") 60 | output1_data = response.as_numpy("OUTPUT1") 61 | 62 | print( 63 | "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format( 64 | input0_data, input1_data, output0_data 65 | ) 66 | ) 67 | print( 68 | "INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format( 69 | input0_data, input1_data, output1_data 70 | ) 71 | ) 72 | 73 | if not np.allclose(input0_data + input1_data, output0_data): 74 | print("add_sub example error: incorrect sum") 75 | sys.exit(1) 76 | 77 | if not np.allclose(input0_data - input1_data, output1_data): 78 | print("add_sub example error: incorrect difference") 79 | sys.exit(1) 80 | 81 | print("PASS: add_sub") 82 | sys.exit(0) 83 | -------------------------------------------------------------------------------- /examples/add_sub/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "add_sub" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "INPUT0" 33 | data_type: TYPE_FP32 34 | dims: [ 4 ] 35 | } 36 | ] 37 | input [ 38 | { 39 | name: "INPUT1" 40 | data_type: TYPE_FP32 41 | dims: [ 4 ] 42 | } 43 | ] 44 | output [ 45 | { 46 | name: "OUTPUT0" 47 | data_type: TYPE_FP32 48 | dims: [ 4 ] 49 | } 50 | ] 51 | output [ 52 | { 53 | name: "OUTPUT1" 54 | data_type: TYPE_FP32 55 | dims: [ 4 ] 56 | } 57 | ] 58 | 59 | instance_group [{ kind: KIND_CPU }] 60 | -------------------------------------------------------------------------------- /examples/auto_complete/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import tritonclient.http as httpclient 31 | from tritonclient.utils import * 32 | 33 | nobatch_model_name = "nobatch_auto_complete" 34 | batch_model_name = "batch_auto_complete" 35 | 36 | 37 | def validate_ios(config, expected_ios, model_name): 38 | for io in config: 39 | for expected_io in expected_ios: 40 | if io["name"] == expected_io["name"]: 41 | if io["data_type"] != expected_io["data_type"]: 42 | print("model '" + model_name + "' has unexpected data_type") 43 | sys.exit(1) 44 | elif io["dims"] != expected_io["dims"]: 45 | print("model '" + model_name + "' has unexpected dims") 46 | sys.exit(1) 47 | 48 | 49 | if __name__ == "__main__": 50 | with httpclient.InferenceServerClient("localhost:8000") as client: 51 | expected_max_batch_size = { 52 | "nobatch_auto_complete": 0, 53 | "batch_auto_complete": 4, 54 | } 55 | expected_inputs = [ 56 | {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}, 57 | {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}, 58 | ] 59 | expected_outputs = [ 60 | {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}, 61 | {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}, 62 | ] 63 | 64 | models = [nobatch_model_name, batch_model_name] 65 | 66 | for model_name in models: 67 | # Validate the auto-complete model configuration 68 | model_config = client.get_model_config(model_name) 69 | if model_config["max_batch_size"] != expected_max_batch_size[model_name]: 70 | print("model '" + model_name + "' has unexpected max_batch_size") 71 | sys.exit(1) 72 | validate_ios(model_config["input"], expected_inputs, model_name) 73 | validate_ios(model_config["output"], expected_outputs, model_name) 74 | print( 75 | "'" 76 | + model_name 77 | + "' configuration matches the expected " 78 | + "auto complete configuration\n" 79 | ) 80 | 81 | print("PASS: auto_complete") 82 | 83 | sys.exit(0) 84 | -------------------------------------------------------------------------------- /examples/bls/async_client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import tritonclient.http as httpclient 31 | from tritonclient.utils import * 32 | 33 | model_name = "bls_async" 34 | shape = [4] 35 | 36 | with httpclient.InferenceServerClient("localhost:8000") as client: 37 | input0_data = np.random.rand(*shape).astype(np.float32) 38 | input1_data = np.random.rand(*shape).astype(np.float32) 39 | inputs = [ 40 | httpclient.InferInput( 41 | "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype) 42 | ), 43 | httpclient.InferInput( 44 | "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype) 45 | ), 46 | ] 47 | 48 | inputs[0].set_data_from_numpy(input0_data) 49 | inputs[1].set_data_from_numpy(input1_data) 50 | 51 | outputs = [ 52 | httpclient.InferRequestedOutput("OUTPUT0"), 53 | httpclient.InferRequestedOutput("OUTPUT1"), 54 | ] 55 | 56 | response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) 57 | 58 | result = response.get_response() 59 | output0_data = response.as_numpy("OUTPUT0") 60 | output1_data = response.as_numpy("OUTPUT1") 61 | 62 | print( 63 | "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format( 64 | input0_data, input1_data, output0_data 65 | ) 66 | ) 67 | print( 68 | "INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format( 69 | input0_data, input1_data, output1_data 70 | ) 71 | ) 72 | 73 | if not np.allclose(input0_data + input1_data, output0_data): 74 | print("BLS async example error: incorrect sum") 75 | sys.exit(1) 76 | 77 | if not np.allclose(input0_data - input1_data, output1_data): 78 | print("BLS async example error: incorrect difference") 79 | sys.exit(1) 80 | 81 | print("PASS: BLS Async") 82 | sys.exit(0) 83 | -------------------------------------------------------------------------------- /examples/bls/async_config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "bls_async" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "INPUT0" 33 | data_type: TYPE_FP32 34 | dims: [ 4 ] 35 | } 36 | ] 37 | input [ 38 | { 39 | name: "INPUT1" 40 | data_type: TYPE_FP32 41 | dims: [ 4 ] 42 | } 43 | ] 44 | output [ 45 | { 46 | name: "OUTPUT0" 47 | data_type: TYPE_FP32 48 | dims: [ 4 ] 49 | } 50 | ] 51 | output [ 52 | { 53 | name: "OUTPUT1" 54 | data_type: TYPE_FP32 55 | dims: [ 4 ] 56 | } 57 | ] 58 | 59 | instance_group [{ kind: KIND_CPU }] 60 | -------------------------------------------------------------------------------- /examples/bls/sync_config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "bls_sync" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "MODEL_NAME" 33 | data_type: TYPE_STRING 34 | dims: [ 1 ] 35 | } 36 | ] 37 | input [ 38 | { 39 | name: "INPUT0" 40 | data_type: TYPE_FP32 41 | dims: [ 4 ] 42 | } 43 | ] 44 | input [ 45 | { 46 | name: "INPUT1" 47 | data_type: TYPE_FP32 48 | dims: [ 4 ] 49 | } 50 | ] 51 | output [ 52 | { 53 | name: "OUTPUT0" 54 | data_type: TYPE_FP32 55 | dims: [ 4 ] 56 | } 57 | ] 58 | output [ 59 | { 60 | name: "OUTPUT1" 61 | data_type: TYPE_FP32 62 | dims: [ 4 ] 63 | } 64 | ] 65 | 66 | instance_group [{ kind: KIND_CPU }] 67 | -------------------------------------------------------------------------------- /examples/bls_decoupled/async_client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import tritonclient.http as httpclient 31 | from tritonclient.utils import * 32 | 33 | model_name = "bls_decoupled_async" 34 | shape = [1] 35 | 36 | with httpclient.InferenceServerClient("localhost:8000") as client: 37 | in_values = [4, 2, 0, 1] 38 | 39 | for in_value in in_values: 40 | input_data = np.array([in_value], dtype=np.int32) 41 | inputs = [ 42 | httpclient.InferInput( 43 | "IN", input_data.shape, np_to_triton_dtype(input_data.dtype) 44 | ) 45 | ] 46 | inputs[0].set_data_from_numpy(input_data) 47 | outputs = [httpclient.InferRequestedOutput("SUM")] 48 | 49 | response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) 50 | 51 | result = response.get_response() 52 | # output_data contains two times of the square value of the input value. 53 | output_data = response.as_numpy("SUM") 54 | print("==========model result==========") 55 | print( 56 | "Two times the square value of {} is {}\n".format(input_data, output_data) 57 | ) 58 | 59 | if not np.allclose((2 * input_data * input_data), output_data): 60 | print( 61 | "BLS Decoupled Async example error: incorrect output value. Expected {}, got {}.".format( 62 | (2 * input_data * input_data), output_data 63 | ) 64 | ) 65 | sys.exit(1) 66 | 67 | print("PASS: BLS Decoupled Async") 68 | sys.exit(0) 69 | -------------------------------------------------------------------------------- /examples/bls_decoupled/async_config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "bls_decoupled_async" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "IN" 33 | data_type: TYPE_INT32 34 | dims: [ 1 ] 35 | } 36 | ] 37 | output [ 38 | { 39 | name: "SUM" 40 | data_type: TYPE_INT32 41 | dims: [ 1 ] 42 | } 43 | ] 44 | 45 | instance_group [{ kind: KIND_CPU }] 46 | -------------------------------------------------------------------------------- /examples/bls_decoupled/sync_client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import tritonclient.http as httpclient 31 | from tritonclient.utils import * 32 | 33 | model_name = "bls_decoupled_sync" 34 | shape = [1] 35 | 36 | with httpclient.InferenceServerClient("localhost:8000") as client: 37 | in_values = [4, 2, 0, 1] 38 | 39 | for in_value in in_values: 40 | input_data = np.array([in_value], dtype=np.int32) 41 | inputs = [ 42 | httpclient.InferInput( 43 | "IN", input_data.shape, np_to_triton_dtype(input_data.dtype) 44 | ) 45 | ] 46 | inputs[0].set_data_from_numpy(input_data) 47 | outputs = [httpclient.InferRequestedOutput("SUM")] 48 | 49 | response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) 50 | 51 | result = response.get_response() 52 | output_data = response.as_numpy("SUM") 53 | print("==========model result==========") 54 | print("The square value of {} is {}\n".format(input_data, output_data)) 55 | 56 | if not np.allclose(input_data * input_data, output_data): 57 | print( 58 | "BLS Decoupled Sync example error: incorrect output value. Expected {}, got {}." 59 | ).format(input_data * input_data, output_data) 60 | sys.exit(1) 61 | 62 | print("PASS: BLS Decoupled Sync") 63 | sys.exit(0) 64 | -------------------------------------------------------------------------------- /examples/bls_decoupled/sync_config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "bls_decoupled_sync" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "IN" 33 | data_type: TYPE_INT32 34 | dims: [ 1 ] 35 | } 36 | ] 37 | output [ 38 | { 39 | name: "SUM" 40 | data_type: TYPE_INT32 41 | dims: [ 1 ] 42 | } 43 | ] 44 | 45 | instance_group [{ kind: KIND_CPU }] 46 | -------------------------------------------------------------------------------- /examples/custom_metrics/README.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Custom Metrics Example 30 | 31 | In this section we demonstrate an end-to-end example for 32 | [Custom Metrics API](../../README.md#custom-metrics) in Python backend. The 33 | [model repository](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_repository.md) 34 | should contain [custom_metrics](./model.py) model. The 35 | [custom_metrics](./model.py) model uses 36 | [Custom Metrics API](../../README.md#custom-metrics) to register and collect 37 | custom metrics. 38 | 39 | ## Deploying the Custom Metrics Models 40 | 41 | 1. Create the model repository: 42 | 43 | ```console 44 | mkdir -p models/custom_metrics/1/ 45 | 46 | # Copy the Python models 47 | cp examples/custom_metrics/model.py models/custom_metrics/1/model.py 48 | cp examples/custom_metrics/config.pbtxt models/custom_metrics/config.pbtxt 49 | ``` 50 | 51 | 2. Start the tritonserver: 52 | 53 | ``` 54 | tritonserver --model-repository `pwd`/models 55 | ``` 56 | 57 | 3. Send inference requests to server: 58 | 59 | ``` 60 | python3 examples/custom_metrics/client.py 61 | ``` 62 | 63 | You should see an output similar to the output below in the client terminal: 64 | 65 | ``` 66 | custom_metrics example: found pattern '# HELP requests_process_latency_ns Cumulative time spent processing requests' in metrics 67 | custom_metrics example: found pattern '# TYPE requests_process_latency_ns counter' in metrics 68 | custom_metrics example: found pattern 'requests_process_latency_ns{model="custom_metrics",version="1"}' in metrics 69 | PASS: custom_metrics 70 | ``` 71 | 72 | In the terminal that runs Triton Server, you should see an output similar to 73 | the output below: 74 | ``` 75 | Cumulative requests processing latency: 223406.0 76 | ``` 77 | 78 | The [model.py](./model.py) model file is heavily commented with 79 | explanations about each of the function calls. 80 | 81 | ### Explanation of the Client Output 82 | 83 | The [client.py](./client.py) sends a HTTP request with url 84 | `http://localhost:8002/metrics` to fetch the metrics from Triton server. The 85 | client then verifies if the custom metrics added in the model file are 86 | correctly reported. 87 | -------------------------------------------------------------------------------- /examples/custom_metrics/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION& AFFILIATES.All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and / or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import requests 31 | import tritonclient.http as httpclient 32 | from tritonclient.utils import * 33 | 34 | model_name = "custom_metrics" 35 | shape = [4] 36 | 37 | 38 | def get_metrics(): 39 | metrics_url = "http://localhost:8002/metrics" 40 | r = requests.get(metrics_url) 41 | r.raise_for_status() 42 | return r.text 43 | 44 | 45 | with httpclient.InferenceServerClient("localhost:8000") as client: 46 | input0_data = np.random.rand(*shape).astype(np.float32) 47 | input1_data = np.random.rand(*shape).astype(np.float32) 48 | inputs = [ 49 | httpclient.InferInput( 50 | "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype) 51 | ), 52 | httpclient.InferInput( 53 | "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype) 54 | ), 55 | ] 56 | 57 | inputs[0].set_data_from_numpy(input0_data) 58 | inputs[1].set_data_from_numpy(input1_data) 59 | 60 | outputs = [ 61 | httpclient.InferRequestedOutput("OUTPUT0"), 62 | httpclient.InferRequestedOutput("OUTPUT1"), 63 | ] 64 | 65 | response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) 66 | 67 | output0_data = response.as_numpy("OUTPUT0") 68 | output1_data = response.as_numpy("OUTPUT1") 69 | 70 | if not np.allclose(input0_data + input1_data, output0_data): 71 | print("custom_metrics example error: incorrect sum") 72 | sys.exit(1) 73 | 74 | if not np.allclose(input0_data - input1_data, output1_data): 75 | print("custom_metrics example error: incorrect difference") 76 | sys.exit(1) 77 | 78 | metrics = get_metrics() 79 | patterns = [ 80 | "# HELP requests_process_latency_ns Cumulative time spent processing requests", 81 | "# TYPE requests_process_latency_ns counter", 82 | 'requests_process_latency_ns{model="custom_metrics",version="1"}', 83 | ] 84 | for pattern in patterns: 85 | if pattern not in metrics: 86 | print( 87 | "custom_metrics example error: missing pattern '{}' in metrics".format( 88 | pattern 89 | ) 90 | ) 91 | sys.exit(1) 92 | else: 93 | print( 94 | "custom_metrics example: found pattern '{}' in metrics".format(pattern) 95 | ) 96 | 97 | print("PASS: custom_metrics") 98 | sys.exit(0) 99 | -------------------------------------------------------------------------------- /examples/custom_metrics/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "custom_metrics" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "INPUT0" 33 | data_type: TYPE_FP32 34 | dims: [ 4 ] 35 | } 36 | ] 37 | input [ 38 | { 39 | name: "INPUT1" 40 | data_type: TYPE_FP32 41 | dims: [ 4 ] 42 | } 43 | ] 44 | output [ 45 | { 46 | name: "OUTPUT0" 47 | data_type: TYPE_FP32 48 | dims: [ 4 ] 49 | } 50 | ] 51 | output [ 52 | { 53 | name: "OUTPUT1" 54 | data_type: TYPE_FP32 55 | dims: [ 4 ] 56 | } 57 | ] 58 | 59 | instance_group [ 60 | { 61 | count: 3 62 | kind: KIND_CPU 63 | } 64 | ] 65 | 66 | -------------------------------------------------------------------------------- /examples/decoupled/repeat_config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "repeat_int32" 28 | backend: "python" 29 | max_batch_size: 0 30 | model_transaction_policy { 31 | decoupled: True 32 | } 33 | input [ 34 | { 35 | name: "IN" 36 | data_type: TYPE_INT32 37 | dims: [ -1 ] 38 | }, 39 | { 40 | name: "DELAY" 41 | data_type: TYPE_UINT32 42 | dims: [ -1 ] 43 | }, 44 | { 45 | name: "WAIT" 46 | data_type: TYPE_UINT32 47 | dims: [ 1 ] 48 | } 49 | ] 50 | output [ 51 | { 52 | name: "OUT" 53 | data_type: TYPE_INT32 54 | dims: [ 1 ] 55 | }, 56 | { 57 | name: "IDX" 58 | data_type: TYPE_UINT32 59 | dims: [ 1 ] 60 | } 61 | ] 62 | instance_group [{ kind: KIND_CPU }] 63 | -------------------------------------------------------------------------------- /examples/decoupled/square_config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "square_int32" 28 | backend: "python" 29 | max_batch_size: 0 30 | model_transaction_policy { 31 | decoupled: True 32 | } 33 | input [ 34 | { 35 | name: "IN" 36 | data_type: TYPE_INT32 37 | dims: [ 1 ] 38 | } 39 | ] 40 | output [ 41 | { 42 | name: "OUT" 43 | data_type: TYPE_INT32 44 | dims: [ 1 ] 45 | } 46 | ] 47 | instance_group [{ kind: KIND_CPU }] 48 | 49 | -------------------------------------------------------------------------------- /examples/instance_kind/client.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import argparse 28 | import json 29 | import sys 30 | import warnings 31 | 32 | import numpy as np 33 | import torch 34 | import tritonclient.http as httpclient 35 | from tritonclient.utils import * 36 | 37 | warnings.filterwarnings("ignore") 38 | 39 | if __name__ == "__main__": 40 | parser = argparse.ArgumentParser() 41 | parser.add_argument( 42 | "--model_name", 43 | type=str, 44 | required=False, 45 | default="resnet50", 46 | help="Model name", 47 | ) 48 | parser.add_argument( 49 | "--image_url", 50 | type=str, 51 | required=False, 52 | default="http://images.cocodataset.org/test2017/000000557146.jpg", 53 | help="Image URL. Default is:\ 54 | http://images.cocodataset.org/test2017/000000557146.jpg", 55 | ) 56 | parser.add_argument( 57 | "--url", 58 | type=str, 59 | required=False, 60 | default="localhost:8000", 61 | help="Inference server URL. Default is localhost:8000.", 62 | ) 63 | parser.add_argument( 64 | "-v", 65 | "--verbose", 66 | action="store_true", 67 | required=False, 68 | default=False, 69 | help="Enable verbose output", 70 | ) 71 | parser.add_argument( 72 | "--label_file", 73 | type=str, 74 | required=False, 75 | default="./resnet50_labels.txt", 76 | help="Path to the file with text representation \ 77 | of available labels", 78 | ) 79 | args = parser.parse_args() 80 | 81 | utils = torch.hub.load( 82 | "NVIDIA/DeepLearningExamples:torchhub", 83 | "nvidia_convnets_processing_utils", 84 | skip_validation=True, 85 | ) 86 | 87 | try: 88 | triton_client = httpclient.InferenceServerClient(args.url) 89 | except Exception as e: 90 | print("channel creation failed: " + str(e)) 91 | sys.exit(1) 92 | 93 | with open(args.label_file) as f: 94 | labels_dict = {idx: line.strip() for idx, line in enumerate(f)} 95 | 96 | if args.verbose: 97 | print(json.dumps(triton_client.get_model_config(args.model_name), indent=4)) 98 | 99 | input_name = "INPUT" 100 | output_name = "OUTPUT" 101 | batch = np.asarray(utils.prepare_input_from_uri(args.image_url)) 102 | 103 | input = httpclient.InferInput(input_name, batch.shape, "FP32") 104 | output = httpclient.InferRequestedOutput(output_name) 105 | 106 | input.set_data_from_numpy(batch) 107 | results = triton_client.infer( 108 | model_name=args.model_name, inputs=[input], outputs=[output] 109 | ) 110 | 111 | output_data = results.as_numpy(output_name) 112 | max_id = np.argmax(output_data, axis=1)[0] 113 | print("Results is class: {}".format(labels_dict[max_id])) 114 | 115 | print("PASS: ResNet50 instance kind") 116 | sys.exit(0) 117 | -------------------------------------------------------------------------------- /examples/instance_kind/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "resnet50" 28 | backend: "python" 29 | max_batch_size: 128 30 | input { 31 | name: "INPUT" 32 | data_type: TYPE_FP32 33 | format: FORMAT_NCHW 34 | dims: [ 3, 224, 224 ] 35 | } 36 | output { 37 | name: "OUTPUT" 38 | data_type: TYPE_FP32 39 | dims: [ 1000 ] 40 | } 41 | 42 | instance_group [{ kind: KIND_CPU }] 43 | -------------------------------------------------------------------------------- /examples/instance_kind/model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import numpy as np 28 | import torch 29 | import triton_python_backend_utils as pb_utils 30 | from torch.utils.dlpack import to_dlpack 31 | 32 | 33 | class TritonPythonModel: 34 | def initialize(self, args): 35 | """ 36 | This function initializes pre-trained ResNet50 model, 37 | depending on the value specified by an `instance_group` parameter 38 | in `config.pbtxt`. 39 | 40 | Depending on what `instance_group` was specified in 41 | the config.pbtxt file (KIND_CPU or KIND_GPU), the model instance 42 | will be initialised on a cpu, a gpu, or both. If `instance_group` was 43 | not specified in the config file, then models will be loaded onto 44 | the default device of the framework. 45 | """ 46 | # Here we set up the device onto which our model will beloaded, 47 | # based on specified `model_instance_kind` and `model_instance_device_id` 48 | # fields. 49 | device = "cuda" if args["model_instance_kind"] == "GPU" else "cpu" 50 | device_id = args["model_instance_device_id"] 51 | self.device = f"{device}:{device_id}" 52 | # This example is configured to work with torch=1.13 53 | # and torchvision=0.14. Thus, we need to provide a proper tag `0.14.1` 54 | # to make sure loaded Resnet50 is compatible with 55 | # installed `torchvision`. 56 | # Refer to README for installation instructions. 57 | self.model = ( 58 | torch.hub.load( 59 | "pytorch/vision:v0.14.1", 60 | "resnet50", 61 | weights="IMAGENET1K_V2", 62 | skip_validation=True, 63 | ) 64 | .to(self.device) 65 | .eval() 66 | ) 67 | 68 | def execute(self, requests): 69 | """ 70 | This function receives a list of requests (`pb_utils.InferenceRequest`), 71 | performs inference on every request and appends it to responses. 72 | """ 73 | responses = [] 74 | for request in requests: 75 | input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT") 76 | with torch.no_grad(): 77 | result = self.model( 78 | torch.as_tensor(input_tensor.as_numpy(), device=self.device) 79 | ) 80 | out_tensor = pb_utils.Tensor.from_dlpack("OUTPUT", to_dlpack(result)) 81 | responses.append(pb_utils.InferenceResponse([out_tensor])) 82 | return responses 83 | -------------------------------------------------------------------------------- /examples/jax/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import tritonclient.http as httpclient 31 | from tritonclient.utils import * 32 | 33 | model_name = "jax" 34 | shape = [4] 35 | 36 | with httpclient.InferenceServerClient("localhost:8000") as client: 37 | input0_data = np.random.rand(*shape).astype(np.float32) 38 | input1_data = np.random.rand(*shape).astype(np.float32) 39 | inputs = [ 40 | httpclient.InferInput( 41 | "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype) 42 | ), 43 | httpclient.InferInput( 44 | "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype) 45 | ), 46 | ] 47 | 48 | inputs[0].set_data_from_numpy(input0_data) 49 | inputs[1].set_data_from_numpy(input1_data) 50 | 51 | outputs = [ 52 | httpclient.InferRequestedOutput("OUTPUT0"), 53 | httpclient.InferRequestedOutput("OUTPUT1"), 54 | ] 55 | 56 | response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) 57 | 58 | result = response.get_response() 59 | output0_data = response.as_numpy("OUTPUT0") 60 | output1_data = response.as_numpy("OUTPUT1") 61 | 62 | print( 63 | "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format( 64 | input0_data, input1_data, output0_data 65 | ) 66 | ) 67 | print( 68 | "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format( 69 | input0_data, input1_data, output1_data 70 | ) 71 | ) 72 | 73 | if not np.allclose(input0_data + input1_data, output0_data): 74 | print("jax example error: incorrect sum") 75 | sys.exit(1) 76 | 77 | if not np.allclose(input0_data - input1_data, output1_data): 78 | print("jax example error: incorrect difference") 79 | sys.exit(1) 80 | 81 | print("PASS: jax") 82 | sys.exit(0) 83 | -------------------------------------------------------------------------------- /examples/jax/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "jax" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "INPUT0" 33 | data_type: TYPE_FP32 34 | dims: [ 4 ] 35 | } 36 | ] 37 | input [ 38 | { 39 | name: "INPUT1" 40 | data_type: TYPE_FP32 41 | dims: [ 4 ] 42 | } 43 | ] 44 | output [ 45 | { 46 | name: "OUTPUT0" 47 | data_type: TYPE_FP32 48 | dims: [ 4 ] 49 | } 50 | ] 51 | output [ 52 | { 53 | name: "OUTPUT1" 54 | data_type: TYPE_FP32 55 | dims: [ 4 ] 56 | } 57 | ] 58 | 59 | instance_group [{ kind: KIND_CPU }] 60 | -------------------------------------------------------------------------------- /examples/preprocessing/README.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Preprocessing Using Python Backend Example 30 | This example shows how to preprocess your inputs using Python backend before it is passed to the TensorRT model for inference. This ensemble model includes an image preprocessing model (preprocess) and a TensorRT model (resnet50_trt) to do inference. 31 | 32 | **1. Converting PyTorch Model to ONNX format:** 33 | 34 | Run onnx_exporter.py to convert ResNet50 PyTorch model to ONNX format. Width and height dims are fixed at 224 but dynamic axes arguments for dynamic batching are used. Commands from the 2. and 3. subsections shall be executed within this Docker container. 35 | 36 | docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/pytorch:xx.yy-py3 bash 37 | pip install numpy pillow torchvision 38 | python onnx_exporter.py --save model.onnx 39 | 40 | **2. Create the model repository:** 41 | 42 | mkdir -p model_repository/ensemble_python_resnet50/1 43 | mkdir -p model_repository/preprocess/1 44 | mkdir -p model_repository/resnet50_trt/1 45 | 46 | # Copy the Python model 47 | cp model.py model_repository/preprocess/1 48 | 49 | **3. Build a TensorRT engine for the ONNX model** 50 | 51 | Set the arguments for enabling fp16 precision --fp16. To enable dynamic shapes use --minShapes, --optShapes, and maxShapes with --explicitBatch: 52 | 53 | trtexec --onnx=model.onnx --saveEngine=./model_repository/resnet50_trt/1/model.plan --explicitBatch --minShapes=input:1x3x224x224 --optShapes=input:1x3x224x224 --maxShapes=input:256x3x224x224 --fp16 54 | 55 | **4. Run the command below to start the server container:** 56 | 57 | Under python_backend/examples/preprocessing, run this command to start the server docker container: 58 | 59 | docker run --gpus=all -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd):/workspace/ -v/$(pwd)/model_repository:/models nvcr.io/nvidia/tritonserver:xx.yy-py3 bash 60 | pip install numpy pillow torchvision 61 | tritonserver --model-repository=/models 62 | 63 | **5. Start the client to test:** 64 | 65 | Under python_backend/examples/preprocessing, run the commands below to start the client Docker container: 66 | 67 | wget https://raw.githubusercontent.com/triton-inference-server/server/main/qa/images/mug.jpg -O "mug.jpg" 68 | docker run --rm --net=host -v $(pwd):/workspace/ nvcr.io/nvidia/tritonserver:xx.yy-py3-sdk python client.py --image mug.jpg 69 | The result of classification is:COFFEE MUG 70 | 71 | Here, since we input an image of "mug" and the inference result is "COFFEE MUG" which is correct. 72 | -------------------------------------------------------------------------------- /examples/preprocessing/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import argparse 28 | import json 29 | import sys 30 | 31 | import numpy as np 32 | import tritonclient.grpc as tritongrpcclient 33 | 34 | 35 | def load_image(img_path: str): 36 | """ 37 | Loads an encoded image as an array of bytes. 38 | 39 | """ 40 | return np.fromfile(img_path, dtype="uint8") 41 | 42 | 43 | if __name__ == "__main__": 44 | parser = argparse.ArgumentParser() 45 | parser.add_argument( 46 | "--model_name", 47 | type=str, 48 | required=False, 49 | default="ensemble_python_resnet50", 50 | help="Model name", 51 | ) 52 | parser.add_argument("--image", type=str, required=True, help="Path to the image") 53 | parser.add_argument( 54 | "--url", 55 | type=str, 56 | required=False, 57 | default="localhost:8001", 58 | help="Inference server URL. Default is localhost:8001.", 59 | ) 60 | parser.add_argument( 61 | "-v", 62 | "--verbose", 63 | action="store_true", 64 | required=False, 65 | default=False, 66 | help="Enable verbose output", 67 | ) 68 | parser.add_argument( 69 | "--label_file", 70 | type=str, 71 | default="./model_repository/resnet50_trt/labels.txt", 72 | help="Path to the file with text representation of available labels", 73 | ) 74 | args = parser.parse_args() 75 | 76 | try: 77 | triton_client = tritongrpcclient.InferenceServerClient( 78 | url=args.url, verbose=args.verbose 79 | ) 80 | except Exception as e: 81 | print("channel creation failed: " + str(e)) 82 | sys.exit(1) 83 | 84 | with open(args.label_file) as f: 85 | labels_dict = {idx: line.strip() for idx, line in enumerate(f)} 86 | 87 | inputs = [] 88 | outputs = [] 89 | input_name = "INPUT" 90 | output_name = "OUTPUT" 91 | image_data = load_image(args.image) 92 | image_data = np.expand_dims(image_data, axis=0) 93 | 94 | inputs.append(tritongrpcclient.InferInput(input_name, image_data.shape, "UINT8")) 95 | outputs.append(tritongrpcclient.InferRequestedOutput(output_name)) 96 | 97 | inputs[0].set_data_from_numpy(image_data) 98 | results = triton_client.infer( 99 | model_name=args.model_name, inputs=inputs, outputs=outputs 100 | ) 101 | 102 | output0_data = results.as_numpy(output_name) 103 | print(output0_data) 104 | maxs = np.argmax(output0_data, axis=1) 105 | print(maxs) 106 | print("Result is class: {}".format(labels_dict[maxs[0]])) 107 | -------------------------------------------------------------------------------- /examples/preprocessing/model_repository/ensemble_python_resnet50/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "ensemble_python_resnet50" 28 | platform: "ensemble" 29 | max_batch_size: 256 30 | input [ 31 | { 32 | name: "INPUT" 33 | data_type: TYPE_UINT8 34 | dims: [ -1 ] 35 | } 36 | ] 37 | output [ 38 | { 39 | name: "OUTPUT" 40 | data_type: TYPE_FP32 41 | dims: [ 1000 ] 42 | } 43 | ] 44 | ensemble_scheduling { 45 | step [ 46 | { 47 | model_name: "preprocess" 48 | model_version: -1 49 | input_map { 50 | key: "INPUT_0" 51 | value: "INPUT" 52 | } 53 | output_map { 54 | key: "OUTPUT_0" 55 | value: "preprocessed_image" 56 | } 57 | }, 58 | { 59 | model_name: "resnet50_trt" 60 | model_version: -1 61 | input_map { 62 | key: "input" 63 | value: "preprocessed_image" 64 | } 65 | output_map { 66 | key: "output" 67 | value: "OUTPUT" 68 | } 69 | } 70 | ] 71 | } 72 | -------------------------------------------------------------------------------- /examples/preprocessing/model_repository/preprocess/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "preprocess" 28 | backend: "python" 29 | max_batch_size: 256 30 | input [ 31 | { 32 | name: "INPUT_0" 33 | data_type: TYPE_UINT8 34 | dims: [ -1 ] 35 | } 36 | ] 37 | 38 | output [ 39 | { 40 | name: "OUTPUT_0" 41 | data_type: TYPE_FP32 42 | dims: [ 3, 224, 224 ] 43 | } 44 | ] 45 | 46 | instance_group [{ kind: KIND_CPU }] 47 | 48 | -------------------------------------------------------------------------------- /examples/preprocessing/model_repository/resnet50_trt/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "resnet50_trt" 28 | platform: "tensorrt_plan" 29 | max_batch_size: 256 30 | input [ 31 | { 32 | name: "input" 33 | data_type: TYPE_FP32 34 | dims: [3, -1, -1 ] 35 | 36 | } 37 | ] 38 | output[ 39 | { 40 | name: "output" 41 | data_type: TYPE_FP32 42 | dims: [ 1000 ] 43 | label_filename: "labels.txt" 44 | } 45 | ] 46 | -------------------------------------------------------------------------------- /examples/preprocessing/onnx_exporter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import argparse 28 | import os 29 | 30 | import torch 31 | import torchvision.models as models 32 | 33 | if __name__ == "__main__": 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument("--save", default="model.onnx") 36 | args = parser.parse_args() 37 | 38 | resnet50 = models.resnet50(pretrained=True) 39 | dummy_input = torch.randn(1, 3, 224, 224) 40 | resnet50 = resnet50.eval() 41 | 42 | torch.onnx.export( 43 | resnet50, 44 | dummy_input, 45 | args.save, 46 | export_params=True, 47 | opset_version=10, 48 | do_constant_folding=True, 49 | input_names=["input"], 50 | output_names=["output"], 51 | dynamic_axes={ 52 | "input": {0: "batch_size", 2: "height", 3: "width"}, 53 | "output": {0: "batch_size"}, 54 | }, 55 | ) 56 | 57 | print("Saved {}".format(args.save)) 58 | -------------------------------------------------------------------------------- /examples/pytorch/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | 29 | import numpy as np 30 | import tritonclient.http as httpclient 31 | from tritonclient.utils import * 32 | 33 | model_name = "pytorch" 34 | shape = [4] 35 | 36 | with httpclient.InferenceServerClient("localhost:8000") as client: 37 | input0_data = np.random.rand(*shape).astype(np.float32) 38 | input1_data = np.random.rand(*shape).astype(np.float32) 39 | inputs = [ 40 | httpclient.InferInput( 41 | "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype) 42 | ), 43 | httpclient.InferInput( 44 | "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype) 45 | ), 46 | ] 47 | 48 | inputs[0].set_data_from_numpy(input0_data) 49 | inputs[1].set_data_from_numpy(input1_data) 50 | 51 | outputs = [ 52 | httpclient.InferRequestedOutput("OUTPUT0"), 53 | httpclient.InferRequestedOutput("OUTPUT1"), 54 | ] 55 | 56 | response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) 57 | 58 | result = response.get_response() 59 | output0_data = response.as_numpy("OUTPUT0") 60 | output1_data = response.as_numpy("OUTPUT1") 61 | 62 | print( 63 | "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format( 64 | input0_data, input1_data, output0_data 65 | ) 66 | ) 67 | print( 68 | "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format( 69 | input0_data, input1_data, output1_data 70 | ) 71 | ) 72 | 73 | if not np.allclose(input0_data + input1_data, output0_data): 74 | print("pytorch example error: incorrect sum") 75 | sys.exit(1) 76 | 77 | if not np.allclose(input0_data - input1_data, output1_data): 78 | print("pytorch example error: incorrect difference") 79 | sys.exit(1) 80 | 81 | print("PASS: pytorch") 82 | sys.exit(0) 83 | -------------------------------------------------------------------------------- /examples/pytorch/config.pbtxt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: "pytorch" 28 | backend: "python" 29 | 30 | input [ 31 | { 32 | name: "INPUT0" 33 | data_type: TYPE_FP32 34 | dims: [ 4 ] 35 | } 36 | ] 37 | input [ 38 | { 39 | name: "INPUT1" 40 | data_type: TYPE_FP32 41 | dims: [ 4 ] 42 | } 43 | ] 44 | output [ 45 | { 46 | name: "OUTPUT0" 47 | data_type: TYPE_FP32 48 | dims: [ 4 ] 49 | } 50 | ] 51 | output [ 52 | { 53 | name: "OUTPUT1" 54 | data_type: TYPE_FP32 55 | dims: [ 4 ] 56 | } 57 | ] 58 | 59 | instance_group [{ kind: KIND_CPU }] 60 | -------------------------------------------------------------------------------- /inferentia/qa/Dockerfile.QA: -------------------------------------------------------------------------------- 1 | # Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | # 28 | # Multistage build. 29 | # 30 | ARG BASE_IMAGE=tritonserver 31 | ARG BUILD_IMAGE=tritonserver_build 32 | ARG SDK_IMAGE=tritonserver_sdk 33 | ARG TRITON_PATH=/home/ubuntu 34 | 35 | FROM ${SDK_IMAGE} AS sdk 36 | FROM $BASE_IMAGE 37 | # Ensure apt-get won't prompt for selecting options 38 | ENV DEBIAN_FRONTEND=noninteractive 39 | # install platform specific packages 40 | RUN if [ $(cat /etc/os-release | grep 'VERSION_ID="20.04"' | wc -l) -ne 0 ]; then \ 41 | apt-get update && \ 42 | apt-get install -y --no-install-recommends \ 43 | libpng-dev; \ 44 | elif [ $(cat /etc/os-release | grep 'VERSION_ID="18.04"' | wc -l) -ne 0 ]; then \ 45 | apt-get update && \ 46 | apt-get install -y --no-install-recommends \ 47 | libpng-dev; \ 48 | else \ 49 | echo "Ubuntu version must be either 18.04 or 20.04" && \ 50 | exit 1; \ 51 | fi 52 | 53 | RUN apt-get update && apt-get install -y --no-install-recommends \ 54 | python3-dev \ 55 | python3-pip \ 56 | build-essential \ 57 | wget && \ 58 | rm -rf /var/lib/apt/lists/* 59 | 60 | RUN rm -f /usr/bin/python && \ 61 | ln -s /usr/bin/python3 /usr/bin/python 62 | 63 | RUN pip3 install --upgrade wheel setuptools && \ 64 | pip3 install --upgrade numpy pillow attrdict future grpcio requests gsutil awscli six grpcio-channelz 65 | 66 | WORKDIR /opt/tritonserver 67 | # Copy the entire qa repo to the /opt/tritonserver/qa repo 68 | COPY --from=tritonserver_build /workspace/qa qa 69 | COPY --chown=1000:1000 --from=sdk /workspace/install client_tmp 70 | RUN mkdir -p qa/clients && mkdir -p qa/pkgs && \ 71 | cp -a client_tmp/bin/* qa/clients/. && \ 72 | cp client_tmp/lib/libgrpcclient.so qa/clients/. && \ 73 | cp client_tmp/lib/libhttpclient.so qa/clients/. && \ 74 | cp client_tmp/python/*.py qa/clients/. && \ 75 | cp client_tmp/python/triton*.whl qa/pkgs/. && \ 76 | cp client_tmp/java/examples/*.jar qa/clients/. && \ 77 | rm -rf client_tmp 78 | # Create mount paths for lib 79 | RUN mkdir /mylib && mkdir /home/ubuntu 80 | 81 | ENV TRITON_PATH ${TRITON_PATH} 82 | ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH} 83 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | [tool.codespell] 28 | # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - 29 | # this is only to allow you to run codespell interactively 30 | skip = "./.git,./.github" 31 | # ignore short words, and typename parameters like OffsetT 32 | ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" 33 | # use the 'clear' dictionary for unambiguous spelling mistakes 34 | builtin = "clear" 35 | # disable warnings about binary files and wrong encoding 36 | quiet-level = 3 37 | 38 | [tool.isort] 39 | profile = "black" 40 | use_parentheses = true 41 | multi_line_output = 3 42 | include_trailing_comma = true 43 | force_grid_wrap = 0 44 | ensure_newline_before_comments = true 45 | line_length = 88 46 | balanced_wrapping = true 47 | indent = " " 48 | skip = ["build"] 49 | -------------------------------------------------------------------------------- /src/correlation_id.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "correlation_id.h" 28 | 29 | namespace triton { namespace backend { namespace python { 30 | 31 | CorrelationId::CorrelationId() 32 | : id_string_(""), id_uint_(0), id_type_(CorrelationIdDataType::UINT64) 33 | { 34 | } 35 | 36 | CorrelationId::CorrelationId(const std::string& id_string) 37 | : id_string_(id_string), id_uint_(0), 38 | id_type_(CorrelationIdDataType::STRING) 39 | { 40 | } 41 | 42 | CorrelationId::CorrelationId(uint64_t id_uint) 43 | : id_string_(""), id_uint_(id_uint), id_type_(CorrelationIdDataType::UINT64) 44 | { 45 | } 46 | 47 | CorrelationId::CorrelationId(const CorrelationId& rhs) 48 | { 49 | id_uint_ = rhs.id_uint_; 50 | id_type_ = rhs.id_type_; 51 | id_string_ = rhs.id_string_; 52 | } 53 | 54 | CorrelationId::CorrelationId(std::unique_ptr& correlation_id_shm) 55 | { 56 | id_uint_ = correlation_id_shm->id_uint_; 57 | id_type_ = correlation_id_shm->id_type_; 58 | id_string_ = correlation_id_shm->id_string_; 59 | } 60 | 61 | CorrelationId& 62 | CorrelationId::operator=(const CorrelationId& rhs) 63 | { 64 | id_uint_ = rhs.id_uint_; 65 | id_type_ = rhs.id_type_; 66 | id_string_ = rhs.id_string_; 67 | return *this; 68 | } 69 | 70 | void 71 | CorrelationId::SaveToSharedMemory( 72 | std::unique_ptr& shm_pool) 73 | { 74 | AllocatedSharedMemory correlation_id_shm = 75 | shm_pool->Construct(); 76 | correlation_id_shm_ptr_ = correlation_id_shm.data_.get(); 77 | 78 | std::unique_ptr id_string_shm = 79 | PbString::Create(shm_pool, id_string_); 80 | 81 | correlation_id_shm_ptr_->id_uint = id_uint_; 82 | correlation_id_shm_ptr_->id_string_shm_handle = id_string_shm->ShmHandle(); 83 | correlation_id_shm_ptr_->id_type = id_type_; 84 | 85 | // Save the references to shared memory. 86 | correlation_id_shm_ = std::move(correlation_id_shm); 87 | id_string_shm_ = std::move(id_string_shm); 88 | shm_handle_ = correlation_id_shm_.handle_; 89 | } 90 | 91 | std::unique_ptr 92 | CorrelationId::LoadFromSharedMemory( 93 | std::unique_ptr& shm_pool, 94 | bi::managed_external_buffer::handle_t handle) 95 | { 96 | AllocatedSharedMemory correlation_id_shm = 97 | shm_pool->Load(handle); 98 | CorrelationIdShm* correlation_id_shm_ptr = correlation_id_shm.data_.get(); 99 | 100 | std::unique_ptr id_string_shm = PbString::LoadFromSharedMemory( 101 | shm_pool, correlation_id_shm_ptr->id_string_shm_handle); 102 | 103 | return std::unique_ptr( 104 | new CorrelationId(correlation_id_shm, id_string_shm)); 105 | } 106 | 107 | CorrelationId::CorrelationId( 108 | AllocatedSharedMemory& correlation_id_shm, 109 | std::unique_ptr& id_string_shm) 110 | : correlation_id_shm_(std::move(correlation_id_shm)), 111 | id_string_shm_(std::move(id_string_shm)) 112 | { 113 | correlation_id_shm_ptr_ = correlation_id_shm_.data_.get(); 114 | shm_handle_ = correlation_id_shm_.handle_; 115 | id_string_ = id_string_shm_->String(); 116 | id_uint_ = correlation_id_shm_ptr_->id_uint; 117 | id_type_ = correlation_id_shm_ptr_->id_type; 118 | } 119 | 120 | }}}; // namespace triton::backend::python 121 | -------------------------------------------------------------------------------- /src/correlation_id.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include "pb_string.h" 32 | #include "pb_utils.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | 36 | enum class CorrelationIdDataType { UINT64, STRING }; 37 | 38 | struct CorrelationIdShm { 39 | bi::managed_external_buffer::handle_t id_string_shm_handle; 40 | uint64_t id_uint; 41 | CorrelationIdDataType id_type; 42 | }; 43 | 44 | class CorrelationId { 45 | public: 46 | CorrelationId(); 47 | CorrelationId(const std::string& id_string); 48 | CorrelationId(uint64_t id_uint); 49 | CorrelationId(const CorrelationId& rhs); 50 | CorrelationId(std::unique_ptr& correlation_id_shm); 51 | CorrelationId& operator=(const CorrelationId& rhs); 52 | 53 | /// Save CorrelationId object to shared memory. 54 | /// \param shm_pool Shared memory pool to save the CorrelationId object. 55 | void SaveToSharedMemory(std::unique_ptr& shm_pool); 56 | 57 | /// Create a CorrelationId object from shared memory. 58 | /// \param shm_pool Shared memory pool 59 | /// \param handle Shared memory handle of the CorrelationId. 60 | /// \return Returns the CorrelationId in the specified handle 61 | /// location. 62 | static std::unique_ptr LoadFromSharedMemory( 63 | std::unique_ptr& shm_pool, 64 | bi::managed_external_buffer::handle_t handle); 65 | 66 | // Function that help determine exact type of Correlation Id 67 | CorrelationIdDataType Type() const { return id_type_; } 68 | 69 | // Get the value of the CorrelationId based on the type 70 | const std::string& StringValue() const { return id_string_; } 71 | uint64_t UnsignedIntValue() const { return id_uint_; } 72 | 73 | bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; } 74 | 75 | private: 76 | // The private constructor for creating a CorrelationId object from shared 77 | // memory. 78 | CorrelationId( 79 | AllocatedSharedMemory& correlation_id_shm, 80 | std::unique_ptr& id_string_shm); 81 | 82 | std::string id_string_; 83 | uint64_t id_uint_; 84 | CorrelationIdDataType id_type_; 85 | 86 | // Shared Memory Data Structures 87 | AllocatedSharedMemory correlation_id_shm_; 88 | CorrelationIdShm* correlation_id_shm_ptr_; 89 | bi::managed_external_buffer::handle_t shm_handle_; 90 | std::unique_ptr id_string_shm_; 91 | }; 92 | 93 | }}}; // namespace triton::backend::python 94 | -------------------------------------------------------------------------------- /src/gpu_buffers.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "gpu_buffers.h" 28 | 29 | #include "pb_string.h" 30 | 31 | namespace triton { namespace backend { namespace python { 32 | GPUBuffersHelper::GPUBuffersHelper() 33 | { 34 | completed_ = false; 35 | } 36 | 37 | void 38 | GPUBuffersHelper::AddBuffer(const bi::managed_external_buffer::handle_t& handle) 39 | { 40 | if (completed_) { 41 | throw PythonBackendException( 42 | "It is not possible to add buffers after 'Complete' has been called on " 43 | "a GPUBuffersHelper."); 44 | } 45 | 46 | buffers_.emplace_back(handle); 47 | } 48 | 49 | void 50 | GPUBuffersHelper::SetError( 51 | std::unique_ptr& shm_pool, const std::string& error) 52 | { 53 | error_shm_ = PbString::Create(shm_pool, error); 54 | } 55 | 56 | void 57 | GPUBuffersHelper::Complete(std::unique_ptr& shm_pool) 58 | { 59 | if (completed_) { 60 | throw PythonBackendException( 61 | "Complete has already been called. Complete should only be called " 62 | "once."); 63 | } 64 | gpu_buffers_shm_ = shm_pool->Construct(); 65 | if (!error_shm_) { 66 | buffers_handle_shm_ = 67 | shm_pool->Construct( 68 | buffers_.size()); 69 | gpu_buffers_shm_.data_->buffer_count = buffers_.size(); 70 | gpu_buffers_shm_.data_->success = true; 71 | gpu_buffers_shm_.data_->buffers = buffers_handle_shm_.handle_; 72 | for (size_t i = 0; i < buffers_.size(); ++i) { 73 | buffers_handle_shm_.data_.get()[i] = buffers_[i]; 74 | } 75 | } else { 76 | gpu_buffers_shm_.data_->success = false; 77 | gpu_buffers_shm_.data_->error = error_shm_->ShmHandle(); 78 | } 79 | completed_ = true; 80 | } 81 | 82 | 83 | bi::managed_external_buffer::handle_t 84 | GPUBuffersHelper::ShmHandle() 85 | { 86 | return gpu_buffers_shm_.handle_; 87 | } 88 | 89 | }}} // namespace triton::backend::python 90 | -------------------------------------------------------------------------------- /src/gpu_buffers.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include "pb_string.h" 30 | #include "pb_utils.h" 31 | #include "scoped_defer.h" 32 | 33 | namespace triton { namespace backend { namespace python { 34 | 35 | /// \param success indicating whether the process of fetching the GPU buffers 36 | /// was successful. 37 | /// \param error if success is equal to false, the error object will be set. 38 | /// \param buffers list of buffers elements. 39 | /// \param buffer_count the number of buffers. 40 | struct GPUBuffersShm { 41 | bool success; 42 | bi::managed_external_buffer::handle_t error; 43 | bi::managed_external_buffer::handle_t buffers; 44 | uint32_t buffer_count; 45 | }; 46 | 47 | /// Helper class to facilitate transfer of metadata associated 48 | /// the GPU buffers in shared memory. 49 | class GPUBuffersHelper { 50 | public: 51 | GPUBuffersHelper(); 52 | void AddBuffer(const bi::managed_external_buffer::handle_t& handle); 53 | void Complete(std::unique_ptr& shm_pool); 54 | void SetError( 55 | std::unique_ptr& shm_pool, const std::string& error); 56 | bi::managed_external_buffer::handle_t ShmHandle(); 57 | 58 | private: 59 | AllocatedSharedMemory gpu_buffers_shm_; 60 | std::vector buffers_; 61 | AllocatedSharedMemory 62 | buffers_handle_shm_; 63 | std::unique_ptr error_shm_; 64 | bool completed_; 65 | }; 66 | 67 | }}}; // namespace triton::backend::python 68 | -------------------------------------------------------------------------------- /src/infer_payload.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "infer_payload.h" 28 | 29 | namespace triton { namespace backend { namespace python { 30 | 31 | InferPayload::InferPayload( 32 | const bool is_decoupled, 33 | std::function)> callback) 34 | : is_decoupled_(is_decoupled), is_promise_set_(false), callback_(callback), 35 | request_address_(reinterpret_cast(nullptr)) 36 | { 37 | promise_.reset(new std::promise>()); 38 | } 39 | 40 | void 41 | InferPayload::SetValue(std::unique_ptr infer_response) 42 | { 43 | { 44 | // Only set value to the promise with the first response. Call the callback 45 | // function to send decoupled response to the stub. 46 | std::lock_guard lock(mutex_); 47 | if (!is_promise_set_) { 48 | is_promise_set_ = true; 49 | promise_->set_value(std::move(infer_response)); 50 | return; 51 | } 52 | } 53 | Callback(std::move(infer_response)); 54 | } 55 | 56 | void 57 | InferPayload::SetFuture( 58 | std::future>& response_future) 59 | { 60 | response_future = promise_->get_future(); 61 | } 62 | 63 | bool 64 | InferPayload::IsDecoupled() 65 | { 66 | return is_decoupled_; 67 | } 68 | 69 | bool 70 | InferPayload::IsPromiseSet() 71 | { 72 | return is_promise_set_; 73 | } 74 | 75 | void 76 | InferPayload::Callback(std::unique_ptr infer_response) 77 | { 78 | return callback_(std::move(infer_response)); 79 | } 80 | 81 | void 82 | InferPayload::SetResponseAllocUserp( 83 | const ResponseAllocatorUserp& response_alloc_userp) 84 | { 85 | response_alloc_userp_ = 86 | std::make_shared(response_alloc_userp); 87 | } 88 | 89 | std::shared_ptr 90 | InferPayload::ResponseAllocUserp() 91 | { 92 | return response_alloc_userp_; 93 | } 94 | 95 | void 96 | InferPayload::SetRequestAddress(intptr_t request_address) 97 | { 98 | std::unique_lock lock(request_address_mutex_); 99 | request_address_ = request_address; 100 | } 101 | 102 | void 103 | InferPayload::SetRequestCancellationFunc( 104 | const std::function& request_cancel_func) 105 | { 106 | request_cancel_func_ = request_cancel_func; 107 | } 108 | 109 | void 110 | InferPayload::SafeCancelRequest() 111 | { 112 | std::unique_lock lock(request_address_mutex_); 113 | if (request_address_ == 0L) { 114 | return; 115 | } 116 | 117 | if (request_cancel_func_) { 118 | request_cancel_func_(request_address_); 119 | } 120 | } 121 | 122 | }}} // namespace triton::backend::python 123 | -------------------------------------------------------------------------------- /src/infer_payload.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | #include 31 | 32 | #include "infer_response.h" 33 | #include "pb_preferred_memory.h" 34 | 35 | namespace triton { namespace backend { namespace python { 36 | 37 | struct ResponseAllocatorUserp { 38 | ResponseAllocatorUserp( 39 | void* shm_pool, const PreferredMemory& preferred_memory) 40 | : shm_pool(shm_pool), preferred_memory(preferred_memory) 41 | { 42 | } 43 | void* shm_pool; 44 | PreferredMemory preferred_memory; 45 | }; 46 | 47 | class InferPayload : public std::enable_shared_from_this { 48 | public: 49 | InferPayload( 50 | const bool is_decouple, 51 | std::function)> callback); 52 | 53 | /// GetPtr should be only called when the InferPayload object is constructed 54 | /// using a shared pointer. Calling this function in any other circumstance 55 | /// is undefined behaviour until C++17. 56 | std::shared_ptr GetPtr() { return shared_from_this(); } 57 | void SetValue(std::unique_ptr infer_response); 58 | void SetFuture(std::future>& response_future); 59 | bool IsDecoupled(); 60 | bool IsPromiseSet(); 61 | void Callback(std::unique_ptr infer_response); 62 | void SetResponseAllocUserp( 63 | const ResponseAllocatorUserp& response_alloc_userp); 64 | std::shared_ptr ResponseAllocUserp(); 65 | void SetRequestAddress(intptr_t request_address); 66 | void SetRequestCancellationFunc( 67 | const std::function& request_cancel_func); 68 | void SafeCancelRequest(); 69 | 70 | private: 71 | std::unique_ptr>> promise_; 72 | bool is_decoupled_; 73 | std::mutex mutex_; 74 | bool is_promise_set_; 75 | std::function)> callback_; 76 | std::shared_ptr response_alloc_userp_; 77 | std::mutex request_address_mutex_; 78 | intptr_t request_address_; 79 | std::function request_cancel_func_; 80 | }; 81 | 82 | }}} // namespace triton::backend::python 83 | -------------------------------------------------------------------------------- /src/infer_trace.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "infer_trace.h" 28 | 29 | namespace triton { namespace backend { namespace python { 30 | 31 | InferenceTrace::InferenceTrace(const InferenceTrace& rhs) 32 | { 33 | triton_trace_ = rhs.triton_trace_; 34 | trace_context_ = rhs.trace_context_; 35 | } 36 | 37 | InferenceTrace& 38 | InferenceTrace::operator=(const InferenceTrace& rhs) 39 | { 40 | triton_trace_ = rhs.triton_trace_; 41 | trace_context_ = rhs.trace_context_; 42 | return *this; 43 | } 44 | 45 | InferenceTrace::InferenceTrace(std::unique_ptr& trace_shm) 46 | { 47 | triton_trace_ = trace_shm->triton_trace_; 48 | trace_context_ = trace_shm->trace_context_; 49 | } 50 | 51 | void 52 | InferenceTrace::SaveToSharedMemory( 53 | std::unique_ptr& shm_pool) 54 | { 55 | AllocatedSharedMemory infer_trace_shm = 56 | shm_pool->Construct(); 57 | infer_trace_shm_ptr_ = infer_trace_shm.data_.get(); 58 | 59 | infer_trace_shm_ptr_->triton_trace = triton_trace_; 60 | 61 | std::unique_ptr trace_context_shm = 62 | PbString::Create(shm_pool, trace_context_); 63 | 64 | infer_trace_shm_ptr_->trace_context_shm_handle = 65 | trace_context_shm->ShmHandle(); 66 | 67 | // Save the references to shared memory. 68 | trace_context_shm_ = std::move(trace_context_shm); 69 | infer_trace_shm_ = std::move(infer_trace_shm); 70 | shm_handle_ = infer_trace_shm_.handle_; 71 | } 72 | 73 | std::unique_ptr 74 | InferenceTrace::LoadFromSharedMemory( 75 | std::unique_ptr& shm_pool, 76 | bi::managed_external_buffer::handle_t handle) 77 | { 78 | AllocatedSharedMemory infer_trace_shm = 79 | shm_pool->Load(handle); 80 | InferenceTraceShm* infer_trace_shm_ptr = infer_trace_shm.data_.get(); 81 | 82 | std::unique_ptr trace_context_shm = PbString::LoadFromSharedMemory( 83 | shm_pool, infer_trace_shm_ptr->trace_context_shm_handle); 84 | 85 | return std::unique_ptr( 86 | new InferenceTrace(infer_trace_shm, trace_context_shm)); 87 | } 88 | 89 | InferenceTrace::InferenceTrace( 90 | AllocatedSharedMemory& infer_trace_shm, 91 | std::unique_ptr& trace_context_shm) 92 | : infer_trace_shm_(std::move(infer_trace_shm)), 93 | trace_context_shm_(std::move(trace_context_shm)) 94 | { 95 | infer_trace_shm_ptr_ = infer_trace_shm_.data_.get(); 96 | shm_handle_ = infer_trace_shm_.handle_; 97 | triton_trace_ = infer_trace_shm_ptr_->triton_trace; 98 | trace_context_ = trace_context_shm_->String(); 99 | } 100 | 101 | }}}; // namespace triton::backend::python 102 | -------------------------------------------------------------------------------- /src/infer_trace.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include "pb_string.h" 32 | #include "pb_utils.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | 36 | struct InferenceTraceShm { 37 | bi::managed_external_buffer::handle_t trace_context_shm_handle; 38 | // The address of the 'TRITONSERVER_InferTrace' object. 39 | void* triton_trace; 40 | }; 41 | 42 | // 43 | // Inference Trace 44 | // 45 | class InferenceTrace { 46 | public: 47 | InferenceTrace(void* triton_trace, const std::string& ctxt) 48 | : triton_trace_(triton_trace), trace_context_(ctxt) 49 | { 50 | } 51 | InferenceTrace() : triton_trace_(nullptr), trace_context_("") {} 52 | InferenceTrace(const InferenceTrace& rhs); 53 | InferenceTrace(std::unique_ptr& trace_shm); 54 | InferenceTrace& operator=(const InferenceTrace& rhs); 55 | /// Save InferenceTrace object to shared memory. 56 | /// \param shm_pool Shared memory pool to save the InferenceTrace object. 57 | void SaveToSharedMemory(std::unique_ptr& shm_pool); 58 | 59 | /// Create a InferenceTrace object from shared memory. 60 | /// \param shm_pool Shared memory pool 61 | /// \param handle Shared memory handle of the InferenceTrace. 62 | /// \return Returns the InferenceTrace in the specified handle 63 | /// location. 64 | static std::unique_ptr LoadFromSharedMemory( 65 | std::unique_ptr& shm_pool, 66 | bi::managed_external_buffer::handle_t handle); 67 | 68 | void* TritonTrace() { return triton_trace_; } 69 | const std::string& Context() const { return trace_context_; } 70 | 71 | bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; } 72 | 73 | private: 74 | // The private constructor for creating a InferenceTrace object from shared 75 | // memory. 76 | InferenceTrace( 77 | AllocatedSharedMemory& infer_trace_shm, 78 | std::unique_ptr& trace_context_shm); 79 | 80 | void* triton_trace_; 81 | std::string trace_context_; 82 | 83 | // Shared Memory Data Structures 84 | AllocatedSharedMemory infer_trace_shm_; 85 | InferenceTraceShm* infer_trace_shm_ptr_; 86 | bi::managed_external_buffer::handle_t shm_handle_; 87 | std::unique_ptr trace_context_shm_; 88 | }; 89 | 90 | }}}; // namespace triton::backend::python 91 | -------------------------------------------------------------------------------- /src/libtriton_python.ldscript: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | { 27 | global: 28 | TRITONBACKEND_*; 29 | local: *; 30 | }; 31 | -------------------------------------------------------------------------------- /src/memory_manager.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "memory_manager.h" 28 | 29 | #include "pb_utils.h" 30 | 31 | 32 | namespace triton { namespace backend { namespace python { 33 | 34 | 35 | #ifdef TRITON_ENABLE_GPU 36 | BackendMemoryRecord::BackendMemoryRecord( 37 | std::unique_ptr backend_memory) 38 | : backend_memory_(std::move(backend_memory)) 39 | { 40 | release_callback_ = [](void* ptr) { 41 | // Do nothing. The backend_memory_ will be destroyed in the destructor. 42 | }; 43 | } 44 | 45 | void* 46 | BackendMemoryRecord::MemoryId() 47 | { 48 | return reinterpret_cast(backend_memory_->MemoryPtr()); 49 | } 50 | 51 | const std::function& 52 | BackendMemoryRecord::ReleaseCallback() 53 | { 54 | return release_callback_; 55 | } 56 | #endif 57 | 58 | MemoryManager::MemoryManager( 59 | std::unique_ptr>&& memory_message_queue) 60 | { 61 | message_queue_ = std::move(memory_message_queue); 62 | thread_ = std::thread(&MemoryManager::QueueMonitorThread, this); 63 | } 64 | 65 | intptr_t 66 | MemoryManager::AddRecord(std::unique_ptr&& memory_record) 67 | { 68 | std::lock_guard lock{mu_}; 69 | 70 | intptr_t memory_record_id = 71 | reinterpret_cast(memory_record->MemoryId()); 72 | records_.emplace(memory_record_id, std::move(memory_record)); 73 | 74 | return memory_record_id; 75 | } 76 | 77 | void 78 | MemoryManager::QueueMonitorThread() 79 | { 80 | while (true) { 81 | intptr_t memory = message_queue_->Pop(); 82 | if (memory == 0) { 83 | return; 84 | } 85 | 86 | { 87 | std::lock_guard lock{mu_}; 88 | auto it = records_.find(memory); 89 | if (it == records_.end()) { 90 | LOG_MESSAGE( 91 | TRITONSERVER_LOG_ERROR, 92 | "Unexpected memory index received for deallocation."); 93 | continue; 94 | } 95 | 96 | // Call the release callback. 97 | it->second->ReleaseCallback()(it->second->MemoryId()); 98 | // it->second.reset(); 99 | records_.erase(it); 100 | } 101 | } 102 | } 103 | 104 | MemoryManager::~MemoryManager() 105 | { 106 | // Push a dummy message that will trigger the destruction of the background 107 | // thread. 108 | message_queue_->Push(DUMMY_MESSAGE); 109 | thread_.join(); 110 | } 111 | 112 | }}}; // namespace triton::backend::python 113 | -------------------------------------------------------------------------------- /src/memory_manager.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include "message_queue.h" 35 | #include "triton/backend/backend_common.h" 36 | #include "triton/backend/backend_memory.h" 37 | #include "triton/core/tritonserver.h" 38 | 39 | #ifdef TRITON_ENABLE_GPU 40 | #include 41 | #endif // TRITON_ENABLE_GPU 42 | 43 | 44 | namespace triton { namespace backend { namespace python { 45 | 46 | class MemoryRecord { 47 | public: 48 | virtual const std::function& ReleaseCallback() = 0; 49 | virtual void* MemoryId() = 0; 50 | virtual ~MemoryRecord() = default; 51 | }; 52 | 53 | #ifdef TRITON_ENABLE_GPU 54 | class BackendMemoryRecord : public MemoryRecord { 55 | public: 56 | BackendMemoryRecord(std::unique_ptr backend_memory); 57 | const std::function& ReleaseCallback() override; 58 | void* MemoryId() override; 59 | ~BackendMemoryRecord() { backend_memory_.reset(); } 60 | 61 | private: 62 | std::unique_ptr backend_memory_; 63 | std::function release_callback_; 64 | }; 65 | #endif 66 | 67 | /// Memory manager class is used primarily for managing the lifetime of GPU 68 | /// tensors in BLS. It mainly consists of a background thread that monitors a 69 | /// message queue in shared memory. Whenever a GPU tensor is created, it will 70 | /// be pushed to the memory manager. The stub process must send a message to the 71 | /// message queue asking the memory manager to deallocate the GPU tensor. 72 | class MemoryManager { 73 | public: 74 | MemoryManager(std::unique_ptr>&& memory_message_queue); 75 | intptr_t AddRecord(std::unique_ptr&& memory_record); 76 | TRITONSERVER_Error* ResetCounter(); 77 | ~MemoryManager(); 78 | 79 | private: 80 | std::thread thread_; 81 | std::unordered_map> records_; 82 | std::unique_ptr> message_queue_; 83 | void QueueMonitorThread(); 84 | std::mutex mu_; 85 | }; 86 | }}}; // namespace triton::backend::python 87 | -------------------------------------------------------------------------------- /src/pb_bls_cancel.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "pb_bls_cancel.h" 28 | 29 | #include "pb_stub.h" 30 | 31 | namespace triton { namespace backend { namespace python { 32 | 33 | void 34 | PbBLSCancel::SaveToSharedMemory(std::unique_ptr& shm_pool) 35 | { 36 | cancel_shm_ = shm_pool->Construct(); 37 | new (&(cancel_shm_.data_->mu)) bi::interprocess_mutex; 38 | new (&(cancel_shm_.data_->cv)) bi::interprocess_condition; 39 | cancel_shm_.data_->waiting_on_stub = false; 40 | cancel_shm_.data_->infer_payload_id = infer_playload_id_; 41 | cancel_shm_.data_->is_cancelled = is_cancelled_; 42 | } 43 | 44 | bi::managed_external_buffer::handle_t 45 | PbBLSCancel::ShmHandle() 46 | { 47 | return cancel_shm_.handle_; 48 | } 49 | 50 | CancelBLSRequestMessage* 51 | PbBLSCancel::ShmPayload() 52 | { 53 | return cancel_shm_.data_.get(); 54 | } 55 | 56 | void 57 | PbBLSCancel::Cancel() 58 | { 59 | // Release the GIL. Python objects are not accessed during the check. 60 | py::gil_scoped_release gil_release; 61 | 62 | std::unique_lock lk(mu_); 63 | // The cancelled flag can only move from false to true, not the other way, so 64 | // it is checked on each query until cancelled and then implicitly cached. 65 | if (is_cancelled_) { 66 | return; 67 | } 68 | if (!updating_) { 69 | std::unique_ptr& stub = Stub::GetOrCreateInstance(); 70 | if (!stub->StubToParentServiceActive()) { 71 | LOG_ERROR << "Cannot communicate with parent service"; 72 | return; 73 | } 74 | 75 | stub->EnqueueCancelBLSRequest(this); 76 | updating_ = true; 77 | } 78 | cv_.wait(lk, [this] { return !updating_; }); 79 | } 80 | 81 | void 82 | PbBLSCancel::ReportIsCancelled(bool is_cancelled) 83 | { 84 | { 85 | std::lock_guard lk(mu_); 86 | is_cancelled_ = is_cancelled; 87 | updating_ = false; 88 | } 89 | cv_.notify_all(); 90 | } 91 | 92 | }}} // namespace triton::backend::python 93 | -------------------------------------------------------------------------------- /src/pb_bls_cancel.h: -------------------------------------------------------------------------------- 1 | // Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | #include 31 | 32 | #include "pb_utils.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | 36 | class PbBLSCancel { 37 | public: 38 | PbBLSCancel(void* infer_playload_id) 39 | : updating_(false), infer_playload_id_(infer_playload_id), 40 | is_cancelled_(false) 41 | { 42 | } 43 | DISALLOW_COPY_AND_ASSIGN(PbBLSCancel); 44 | 45 | void SaveToSharedMemory(std::unique_ptr& shm_pool); 46 | bi::managed_external_buffer::handle_t ShmHandle(); 47 | CancelBLSRequestMessage* ShmPayload(); 48 | 49 | void Cancel(); 50 | void ReportIsCancelled(bool is_cancelled); 51 | 52 | private: 53 | AllocatedSharedMemory cancel_shm_; 54 | 55 | std::mutex mu_; 56 | std::condition_variable cv_; 57 | bool updating_; 58 | 59 | void* infer_playload_id_; 60 | bool is_cancelled_; 61 | }; 62 | 63 | }}}; // namespace triton::backend::python 64 | -------------------------------------------------------------------------------- /src/pb_cancel.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "pb_cancel.h" 28 | 29 | #include "pb_stub.h" 30 | 31 | namespace triton { namespace backend { namespace python { 32 | 33 | void 34 | PbCancel::SaveToSharedMemory(std::unique_ptr& shm_pool) 35 | { 36 | cancel_shm_ = shm_pool->Construct(); 37 | new (&(cancel_shm_.data_->mu)) bi::interprocess_mutex; 38 | new (&(cancel_shm_.data_->cv)) bi::interprocess_condition; 39 | cancel_shm_.data_->waiting_on_stub = false; 40 | cancel_shm_.data_->response_factory_address = response_factory_address_; 41 | cancel_shm_.data_->request_address = request_address_; 42 | cancel_shm_.data_->is_cancelled = is_cancelled_; 43 | } 44 | 45 | bi::managed_external_buffer::handle_t 46 | PbCancel::ShmHandle() 47 | { 48 | return cancel_shm_.handle_; 49 | } 50 | 51 | IsCancelledMessage* 52 | PbCancel::ShmPayload() 53 | { 54 | return cancel_shm_.data_.get(); 55 | } 56 | 57 | bool 58 | PbCancel::IsCancelled() 59 | { 60 | // Release the GIL. Python objects are not accessed during the check. 61 | py::gil_scoped_release gil_release; 62 | 63 | std::unique_lock lk(mu_); 64 | // The cancelled flag can only move from false to true, not the other way, so 65 | // it is checked on each query until cancelled and then implicitly cached. 66 | if (is_cancelled_) { 67 | return is_cancelled_; 68 | } 69 | if (!updating_) { 70 | std::unique_ptr& stub = Stub::GetOrCreateInstance(); 71 | if (!stub->StubToParentServiceActive()) { 72 | LOG_ERROR << "Cannot communicate with parent service"; 73 | return false; 74 | } 75 | stub->EnqueueIsCancelled(this); 76 | updating_ = true; 77 | } 78 | cv_.wait(lk, [this] { return !updating_; }); 79 | return is_cancelled_; 80 | } 81 | 82 | void 83 | PbCancel::ReportIsCancelled(bool is_cancelled) 84 | { 85 | { 86 | std::lock_guard lk(mu_); 87 | is_cancelled_ = is_cancelled; 88 | updating_ = false; 89 | } 90 | cv_.notify_all(); 91 | } 92 | 93 | }}} // namespace triton::backend::python 94 | -------------------------------------------------------------------------------- /src/pb_cancel.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | #include 31 | 32 | #include "pb_utils.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | 36 | class PbCancel { 37 | public: 38 | PbCancel(intptr_t response_factory_address, intptr_t request_address) 39 | : updating_(false), response_factory_address_(response_factory_address), 40 | request_address_(request_address), is_cancelled_(false) 41 | { 42 | } 43 | DISALLOW_COPY_AND_ASSIGN(PbCancel); 44 | 45 | void SaveToSharedMemory(std::unique_ptr& shm_pool); 46 | bi::managed_external_buffer::handle_t ShmHandle(); 47 | IsCancelledMessage* ShmPayload(); 48 | 49 | bool IsCancelled(); 50 | void ReportIsCancelled(bool is_cancelled); 51 | 52 | private: 53 | AllocatedSharedMemory cancel_shm_; 54 | 55 | std::mutex mu_; 56 | std::condition_variable cv_; 57 | bool updating_; 58 | 59 | intptr_t response_factory_address_; 60 | intptr_t request_address_; 61 | bool is_cancelled_; 62 | }; 63 | 64 | }}}; // namespace triton::backend::python 65 | -------------------------------------------------------------------------------- /src/pb_env.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #ifdef WIN32 34 | #include 35 | #undef PATH_MAX 36 | #define PATH_MAX MAX_PATH 37 | #endif 38 | namespace triton { namespace backend { namespace python { 39 | 40 | void ExtractTarFile(std::string& archive_path, std::string& dst_path); 41 | 42 | bool FileExists(std::string& path); 43 | 44 | // 45 | // A class that manages Python environments 46 | // 47 | #ifndef _WIN32 48 | class EnvironmentManager { 49 | std::map> env_map_; 50 | char base_path_[PATH_MAX + 1]; 51 | std::mutex mutex_; 52 | 53 | public: 54 | EnvironmentManager(); 55 | 56 | // Extracts the tar.gz file in the 'env_path' if it has not been 57 | // already extracted. 58 | std::string ExtractIfNotExtracted(std::string env_path); 59 | ~EnvironmentManager(); 60 | }; 61 | #endif 62 | 63 | }}} // namespace triton::backend::python 64 | -------------------------------------------------------------------------------- /src/pb_error.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "pb_error.h" 28 | 29 | namespace triton { namespace backend { namespace python { 30 | 31 | TRITONSERVER_Error_Code 32 | PbError::Code() 33 | { 34 | return code_; 35 | } 36 | 37 | const std::string& 38 | PbError::Message() 39 | { 40 | return message_; 41 | } 42 | 43 | bi::managed_external_buffer::handle_t 44 | PbError::ShmHandle() 45 | { 46 | return shm_handle_; 47 | } 48 | 49 | void 50 | PbError::SaveToSharedMemory(std::unique_ptr& shm_pool) 51 | { 52 | message_shm_ = PbString::Create(shm_pool, message_); 53 | error_shm_ = shm_pool->Construct(); 54 | error_shm_.data_->code = code_; 55 | error_shm_.data_->message_shm_handle = message_shm_->ShmHandle(); 56 | shm_handle_ = error_shm_.handle_; 57 | } 58 | 59 | std::shared_ptr 60 | PbError::LoadFromSharedMemory( 61 | std::unique_ptr& shm_pool, 62 | bi::managed_external_buffer::handle_t shm_handle) 63 | { 64 | AllocatedSharedMemory error_shm = 65 | shm_pool->Load(shm_handle); 66 | std::unique_ptr message_shm = PbString::LoadFromSharedMemory( 67 | shm_pool, error_shm.data_->message_shm_handle); 68 | 69 | TRITONSERVER_Error_Code code = error_shm.data_->code; 70 | std::string message = message_shm->String(); 71 | 72 | return std::shared_ptr(new PbError( 73 | std::move(message_shm), std::move(error_shm), code, std::move(message))); 74 | } 75 | 76 | PbError::PbError( 77 | std::shared_ptr&& message_shm, 78 | AllocatedSharedMemory&& error_shm, TRITONSERVER_Error_Code code, 79 | std::string&& message) 80 | : message_shm_(std::move(message_shm)), error_shm_(std::move(error_shm)), 81 | code_(code), message_(std::move(message)) 82 | { 83 | } 84 | 85 | }}} // namespace triton::backend::python 86 | -------------------------------------------------------------------------------- /src/pb_error.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include "pb_string.h" 32 | #include "pb_utils.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | 36 | struct PbErrorShm { 37 | TRITONSERVER_Error_Code code; 38 | bi::managed_external_buffer::handle_t message_shm_handle; 39 | }; 40 | 41 | class PbError { 42 | public: 43 | PbError( 44 | const std::string& message, 45 | TRITONSERVER_Error_Code code = TRITONSERVER_ERROR_INTERNAL) 46 | : code_(code), message_(message) 47 | { 48 | } 49 | DISALLOW_COPY_AND_ASSIGN(PbError); 50 | 51 | TRITONSERVER_Error_Code Code(); 52 | const std::string& Message(); 53 | 54 | void SaveToSharedMemory(std::unique_ptr& shm_pool); 55 | bi::managed_external_buffer::handle_t ShmHandle(); 56 | 57 | static std::shared_ptr LoadFromSharedMemory( 58 | std::unique_ptr& shm_pool, 59 | bi::managed_external_buffer::handle_t handle); 60 | 61 | private: 62 | PbError( 63 | std::shared_ptr&& message_shm, 64 | AllocatedSharedMemory&& error_shm, 65 | TRITONSERVER_Error_Code code, std::string&& message); 66 | 67 | std::shared_ptr message_shm_; 68 | AllocatedSharedMemory error_shm_; 69 | bi::managed_external_buffer::handle_t shm_handle_; 70 | 71 | TRITONSERVER_Error_Code code_; 72 | std::string message_; 73 | }; 74 | 75 | }}}; // namespace triton::backend::python 76 | -------------------------------------------------------------------------------- /src/pb_exception.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | namespace triton { namespace backend { namespace python { 32 | 33 | // 34 | // PythonBackendException 35 | // 36 | // Exception thrown if error occurs in PythonBackend. 37 | // 38 | struct PythonBackendException : std::exception { 39 | PythonBackendException(const std::string& message) : message_(message) {} 40 | 41 | const char* what() const throw() { return message_.c_str(); } 42 | 43 | std::string message_; 44 | }; 45 | 46 | }}} // namespace triton::backend::python 47 | -------------------------------------------------------------------------------- /src/pb_log.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "pb_log.h" 28 | 29 | namespace triton { namespace backend { namespace python { 30 | 31 | PbLog::PbLog( 32 | const std::string& filename, uint32_t line, const std::string& message, 33 | LogLevel level) 34 | : filename_(filename), line_(line), message_(message), level_(level) 35 | { 36 | } 37 | 38 | const std::string& 39 | PbLog::Filename() 40 | { 41 | return filename_; 42 | } 43 | const std::string& 44 | PbLog::Message() 45 | { 46 | return message_; 47 | } 48 | const LogLevel& 49 | PbLog::Level() 50 | { 51 | return level_; 52 | } 53 | const uint32_t& 54 | PbLog::Line() 55 | { 56 | return line_; 57 | } 58 | 59 | PbLogShm::PbLogShm( 60 | AllocatedSharedMemory& log_container_shm, 61 | std::unique_ptr& filename, std::unique_ptr& message) 62 | : log_container_shm_(std::move(log_container_shm)), 63 | filename_pb_string_(std::move(filename)), 64 | message_pb_string_(std::move(message)) 65 | { 66 | log_container_shm_ptr_ = log_container_shm_.data_.get(); 67 | log_container_shm_ptr_->filename = filename_pb_string_->ShmHandle(); 68 | log_container_shm_ptr_->log_message = message_pb_string_->ShmHandle(); 69 | } 70 | 71 | std::unique_ptr 72 | PbLogShm::Create( 73 | std::unique_ptr& shm_pool, const std::string& filename, 74 | const uint32_t& line, const std::string& message, const LogLevel& level) 75 | { 76 | std::unique_ptr file_name = PbString::Create(shm_pool, filename); 77 | std::unique_ptr log_message = PbString::Create(shm_pool, message); 78 | AllocatedSharedMemory log_send_message = 79 | shm_pool->Construct(); 80 | 81 | LogSendMessage* send_message_payload = log_send_message.data_.get(); 82 | new (&(send_message_payload->mu)) bi::interprocess_mutex; 83 | new (&(send_message_payload->cv)) bi::interprocess_condition; 84 | send_message_payload->line = line; 85 | send_message_payload->level = level; 86 | 87 | return std::unique_ptr( 88 | new PbLogShm(log_send_message, file_name, log_message)); 89 | } 90 | 91 | std::unique_ptr 92 | PbLogShm::LoadFromSharedMemory( 93 | std::unique_ptr& shm_pool, 94 | bi::managed_external_buffer::handle_t handle) 95 | { 96 | AllocatedSharedMemory log_container_shm = 97 | shm_pool->Load(handle); 98 | std::unique_ptr pb_string_filename = PbString::LoadFromSharedMemory( 99 | shm_pool, log_container_shm.data_->filename); 100 | const std::string& filename = pb_string_filename->String(); 101 | uint32_t line = log_container_shm.data_->line; 102 | std::unique_ptr pb_string_msg = PbString::LoadFromSharedMemory( 103 | shm_pool, log_container_shm.data_->log_message); 104 | const std::string& message = pb_string_msg->String(); 105 | LogLevel level = log_container_shm.data_->level; 106 | return std::unique_ptr(new PbLog(filename, line, message, level)); 107 | } 108 | 109 | bi::managed_external_buffer::handle_t 110 | PbLogShm::ShmHandle() 111 | { 112 | return log_container_shm_.handle_; 113 | } 114 | 115 | LogSendMessage* 116 | PbLogShm::LogMessage() 117 | { 118 | return log_container_shm_ptr_; 119 | } 120 | 121 | }}} // namespace triton::backend::python 122 | -------------------------------------------------------------------------------- /src/pb_log.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include "pb_string.h" 32 | #include "pb_utils.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | class PbLog { 36 | public: 37 | /// Create a PbLog instance 38 | PbLog( 39 | const std::string& filename, uint32_t line, const std::string& message, 40 | LogLevel level); 41 | 42 | /// Get the filename where the log was recorded 43 | const std::string& Filename(); 44 | 45 | /// Get the log message 46 | const std::string& Message(); 47 | 48 | /// Get the log level of the message 49 | const LogLevel& Level(); 50 | 51 | /// Get the line number of the log message 52 | const uint32_t& Line(); 53 | 54 | private: 55 | std::string filename_; 56 | uint32_t line_; 57 | std::string message_; 58 | LogLevel level_; 59 | }; 60 | 61 | class PbLogShm { 62 | public: 63 | /// Save PbLog object to shared memory 64 | static std::unique_ptr Create( 65 | std::unique_ptr& shm_pool, 66 | const std::string& filename, const uint32_t& line, 67 | const std::string& message, const LogLevel& level); 68 | 69 | /// Load PbLog object to shared memory 70 | static std::unique_ptr LoadFromSharedMemory( 71 | std::unique_ptr& shm_pool, 72 | bi::managed_external_buffer::handle_t handle); 73 | 74 | /// Get the shared memory handle of the saved log message 75 | bi::managed_external_buffer::handle_t ShmHandle(); 76 | 77 | /// Get a pointer to the saved log message 78 | LogSendMessage* LogMessage(); 79 | 80 | private: 81 | AllocatedSharedMemory log_container_shm_; 82 | std::unique_ptr filename_pb_string_; 83 | std::unique_ptr message_pb_string_; 84 | 85 | LogSendMessage* log_container_shm_ptr_; 86 | 87 | PbLogShm( 88 | AllocatedSharedMemory& log_container_shm, 89 | std::unique_ptr& filename, std::unique_ptr& message); 90 | }; 91 | }}}; // namespace triton::backend::python 92 | -------------------------------------------------------------------------------- /src/pb_map.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "pb_map.h" 28 | 29 | namespace triton { namespace backend { namespace python { 30 | 31 | std::unique_ptr 32 | PbMap::Create( 33 | std::unique_ptr& shm_pool, 34 | std::unordered_map& map) 35 | { 36 | std::vector> strings; 37 | AllocatedSharedMemory dict_shm = shm_pool->Construct(); 38 | dict_shm.data_->length = map.size(); 39 | 40 | AllocatedSharedMemory pair_shms = 41 | shm_pool->Construct(map.size()); 42 | dict_shm.data_->values = pair_shms.handle_; 43 | 44 | size_t i = 0; 45 | for (auto& pair : map) { 46 | auto key = PbString::Create(shm_pool, pair.first); 47 | auto value = PbString::Create(shm_pool, pair.second); 48 | 49 | (pair_shms.data_.get())[i].key = key->ShmHandle(); 50 | (pair_shms.data_.get())[i].value = value->ShmHandle(); 51 | 52 | strings.emplace_back(std::move(key)); 53 | strings.emplace_back(std::move(value)); 54 | i++; 55 | } 56 | 57 | return std::unique_ptr(new PbMap(strings, dict_shm, pair_shms, map)); 58 | } 59 | 60 | const std::unordered_map& 61 | PbMap::UnorderedMap() 62 | { 63 | return map_; 64 | } 65 | 66 | bi::managed_external_buffer::handle_t 67 | PbMap::ShmHandle() 68 | { 69 | return dict_handle_; 70 | } 71 | 72 | std::unique_ptr 73 | PbMap::LoadFromSharedMemory( 74 | std::unique_ptr& shm_pool, 75 | bi::managed_external_buffer::handle_t handle) 76 | { 77 | AllocatedSharedMemory dict_shm = shm_pool->Load(handle); 78 | AllocatedSharedMemory pair_shms = 79 | shm_pool->Load(dict_shm.data_->values); 80 | 81 | std::vector> pb_strings; 82 | std::unordered_map map; 83 | for (size_t i = 0; i < dict_shm.data_->length; i++) { 84 | std::unique_ptr key = PbString::LoadFromSharedMemory( 85 | shm_pool, (pair_shms.data_.get())[i].key); 86 | 87 | std::unique_ptr value = PbString::LoadFromSharedMemory( 88 | shm_pool, (pair_shms.data_.get())[i].value); 89 | 90 | map.insert({key->String(), value->String()}); 91 | pb_strings.emplace_back(std::move(key)); 92 | pb_strings.emplace_back(std::move(value)); 93 | } 94 | 95 | return std::unique_ptr( 96 | new PbMap(pb_strings, dict_shm, pair_shms, map)); 97 | } 98 | 99 | PbMap::PbMap( 100 | std::vector>& strings, 101 | AllocatedSharedMemory& dict_shm, 102 | AllocatedSharedMemory& pair_shms, 103 | std::unordered_map& map) 104 | : strings_(std::move(strings)), dict_shm_(std::move(dict_shm)), 105 | pair_shms_(std::move(pair_shms)), map_(std::move(map)) 106 | { 107 | dict_handle_ = dict_shm.handle_; 108 | } 109 | 110 | }}} // namespace triton::backend::python 111 | -------------------------------------------------------------------------------- /src/pb_map.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include "pb_string.h" 32 | #include "shm_manager.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | 36 | struct PairShm { 37 | bi::managed_external_buffer::handle_t key; 38 | bi::managed_external_buffer::handle_t value; 39 | }; 40 | 41 | struct DictShm { 42 | uint32_t length; 43 | // `values` point to the location where there are `length` of Pair objects. 44 | bi::managed_external_buffer::handle_t values; 45 | }; 46 | 47 | 48 | class PbMap { 49 | public: 50 | static std::unique_ptr Create( 51 | std::unique_ptr& shm_pool, 52 | std::unordered_map& map); 53 | static std::unique_ptr LoadFromSharedMemory( 54 | std::unique_ptr& shm_pool, 55 | bi::managed_external_buffer::handle_t handle); 56 | const std::unordered_map& UnorderedMap(); 57 | bi::managed_external_buffer::handle_t ShmHandle(); 58 | 59 | private: 60 | PbMap( 61 | std::vector>& strings, 62 | AllocatedSharedMemory& dict_shm, 63 | AllocatedSharedMemory& pair_shms, 64 | std::unordered_map& map); 65 | 66 | std::vector> strings_; 67 | AllocatedSharedMemory dict_shm_; 68 | AllocatedSharedMemory pair_shms_; 69 | bi::managed_external_buffer::handle_t dict_handle_; 70 | std::unordered_map map_; 71 | }; 72 | }}} // namespace triton::backend::python 73 | -------------------------------------------------------------------------------- /src/pb_metric_reporter.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "pb_metric_reporter.h" 28 | 29 | #include "triton/backend/backend_common.h" 30 | 31 | namespace triton { namespace backend { namespace python { 32 | 33 | PbMetricReporter::PbMetricReporter( 34 | TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests, 35 | const uint32_t request_count, 36 | std::shared_ptr> responses) 37 | : instance_(instance), requests_(requests), request_count_(request_count), 38 | responses_(responses), total_batch_size_(0), exec_start_ns_(0), 39 | compute_start_ns_(0), compute_end_ns_(0), exec_end_ns_(0), 40 | success_status_(true) 41 | { 42 | } 43 | 44 | PbMetricReporter::~PbMetricReporter() 45 | { 46 | for (uint32_t r = 0; r < request_count_; ++r) { 47 | TRITONBACKEND_Request* request = requests_[r]; 48 | 49 | // Report statistics for the request. Note that there could 50 | // still be responses that have not yet been sent but those 51 | // cannot be captured in the statistics as they reflect only the 52 | // request object. We use the execution start/end time for 53 | // compute also so that the entire execution time is associated 54 | // with the inference computation. 55 | if (responses_) { 56 | LOG_IF_ERROR( 57 | TRITONBACKEND_ModelInstanceReportStatistics( 58 | instance_, request, ((*responses_)[r] != nullptr) /* success */, 59 | exec_start_ns_, compute_start_ns_, compute_end_ns_, exec_end_ns_), 60 | "failed reporting request statistics"); 61 | } else { 62 | LOG_IF_ERROR( 63 | TRITONBACKEND_ModelInstanceReportStatistics( 64 | instance_, request, success_status_, exec_start_ns_, 65 | compute_start_ns_, compute_end_ns_, exec_end_ns_), 66 | "failed reporting request statistics"); 67 | } 68 | } 69 | 70 | // Report the entire batch statistics. This backend does not support 71 | // batching so the total batch size is always 1. 72 | if (total_batch_size_ != 0) { 73 | LOG_IF_ERROR( 74 | TRITONBACKEND_ModelInstanceReportBatchStatistics( 75 | instance_, total_batch_size_, exec_start_ns_, compute_start_ns_, 76 | compute_end_ns_, exec_end_ns_), 77 | "failed reporting batch request statistics"); 78 | } 79 | } 80 | 81 | void 82 | PbMetricReporter::SetBatchStatistics(size_t total_batch_size) 83 | { 84 | total_batch_size_ = total_batch_size; 85 | } 86 | 87 | void 88 | PbMetricReporter::SetExecStartNs(const uint64_t exec_start_ns) 89 | { 90 | exec_start_ns_ = exec_start_ns; 91 | } 92 | 93 | void 94 | PbMetricReporter::SetComputeStartNs(const uint64_t compute_start_ns) 95 | { 96 | compute_start_ns_ = compute_start_ns; 97 | } 98 | 99 | void 100 | PbMetricReporter::SetComputeEndNs(const uint64_t compute_end_ns) 101 | { 102 | compute_end_ns_ = compute_end_ns; 103 | } 104 | 105 | void 106 | PbMetricReporter::SetExecEndNs(const uint64_t exec_end_ns) 107 | { 108 | exec_end_ns_ = exec_end_ns; 109 | } 110 | 111 | void 112 | PbMetricReporter::SetSuccessStatus(const bool success_status) 113 | { 114 | success_status_ = success_status; 115 | } 116 | 117 | }}} // namespace triton::backend::python 118 | -------------------------------------------------------------------------------- /src/pb_metric_reporter.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | #include "triton/core/tritonbackend.h" 34 | 35 | namespace triton { namespace backend { namespace python { 36 | class PbMetricReporter { 37 | TRITONBACKEND_ModelInstance* instance_; 38 | TRITONBACKEND_Request** requests_; 39 | uint32_t request_count_; 40 | std::shared_ptr> responses_; 41 | size_t total_batch_size_; 42 | uint64_t exec_start_ns_; 43 | uint64_t compute_start_ns_; 44 | uint64_t compute_end_ns_; 45 | uint64_t exec_end_ns_; 46 | bool success_status_; 47 | 48 | public: 49 | PbMetricReporter( 50 | TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests, 51 | const uint32_t request_count, 52 | std::shared_ptr> responses); 53 | ~PbMetricReporter(); 54 | void SetBatchStatistics(size_t total_batch_size); 55 | void SetExecStartNs(const uint64_t exec_start_ns); 56 | void SetComputeStartNs(const uint64_t compute_start_ns); 57 | void SetComputeEndNs(const uint64_t compute_end_ns); 58 | void SetExecEndNs(const uint64_t exec_end_ns); 59 | void SetSuccessStatus(const bool success_status); 60 | }; 61 | }}}; // namespace triton::backend::python 62 | -------------------------------------------------------------------------------- /src/pb_preferred_memory.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | namespace triton { namespace backend { namespace python { 30 | 31 | class PreferredMemory { 32 | public: 33 | enum MemoryType { kGPU, kCPU, kDefault }; 34 | 35 | PreferredMemory() 36 | : preferred_memory_type_(MemoryType::kDefault), preferred_device_id_(0) 37 | { 38 | } 39 | 40 | PreferredMemory( 41 | const MemoryType& preferred_memory_type, 42 | const int64_t& preferred_device_id) 43 | : preferred_memory_type_(preferred_memory_type), 44 | preferred_device_id_(preferred_device_id) 45 | { 46 | } 47 | 48 | MemoryType PreferredMemoryType() { return preferred_memory_type_; } 49 | 50 | int64_t PreferredDeviceId() { return preferred_device_id_; } 51 | 52 | private: 53 | MemoryType preferred_memory_type_; 54 | int64_t preferred_device_id_; 55 | }; 56 | 57 | }}} // namespace triton::backend::python 58 | -------------------------------------------------------------------------------- /src/pb_response_iterator.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include "infer_response.h" 32 | #include "pb_bls_cancel.h" 33 | 34 | namespace triton { namespace backend { namespace python { 35 | 36 | class ResponseIterator { 37 | public: 38 | ResponseIterator(const std::shared_ptr& response); 39 | ~ResponseIterator(); 40 | 41 | std::shared_ptr Next(); 42 | void Iter(); 43 | void EnqueueResponse(std::shared_ptr infer_response); 44 | void* Id(); 45 | void Clear(); 46 | std::vector> GetExistingResponses(); 47 | void Cancel(); 48 | 49 | private: 50 | std::vector> responses_; 51 | std::queue> response_buffer_; 52 | std::mutex mu_; 53 | std::condition_variable cv_; 54 | void* id_; 55 | bool is_finished_; 56 | bool is_cleared_; 57 | size_t idx_; 58 | std::shared_ptr pb_bls_cancel_; 59 | }; 60 | 61 | }}} // namespace triton::backend::python 62 | -------------------------------------------------------------------------------- /src/pb_string.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include "shm_manager.h" 30 | 31 | namespace triton { namespace backend { namespace python { 32 | 33 | struct StringShm { 34 | bi::managed_external_buffer::handle_t data; 35 | size_t length; 36 | }; 37 | 38 | class PbString { 39 | public: 40 | static std::unique_ptr Create( 41 | std::unique_ptr& shm_pool, 42 | const std::string& string); 43 | static std::unique_ptr Create( 44 | const std::string& string, char* data_shm, 45 | bi::managed_external_buffer::handle_t handle); 46 | static std::unique_ptr LoadFromSharedMemory( 47 | std::unique_ptr& shm_pool, 48 | bi::managed_external_buffer::handle_t handle); 49 | static std::unique_ptr LoadFromSharedMemory( 50 | bi::managed_external_buffer::handle_t handle, char* data_shm); 51 | static std::size_t ShmStructSize(const std::string& string); 52 | 53 | char* MutableString() { return string_shm_ptr_; } 54 | std::string String() 55 | { 56 | return std::string( 57 | string_shm_ptr_, string_shm_ptr_ + string_container_shm_ptr_->length); 58 | } 59 | bi::managed_external_buffer::handle_t ShmHandle(); 60 | std::size_t Size(); 61 | 62 | private: 63 | AllocatedSharedMemory string_container_shm_; 64 | StringShm* string_container_shm_ptr_; 65 | 66 | AllocatedSharedMemory string_shm_; 67 | char* string_shm_ptr_; 68 | 69 | bi::managed_external_buffer::handle_t string_handle_; 70 | 71 | PbString( 72 | AllocatedSharedMemory& string_container_shm, 73 | AllocatedSharedMemory& string_shm); 74 | 75 | PbString( 76 | StringShm* string_container_shm, char* string_shm, 77 | bi::managed_external_buffer::handle_t handle); 78 | }; 79 | 80 | }}} // namespace triton::backend::python 81 | -------------------------------------------------------------------------------- /src/pb_stub_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "triton/core/tritonserver.h" 33 | 34 | namespace py = pybind11; 35 | namespace triton { namespace backend { namespace python { 36 | 37 | /// Convert numpy dtype to triton dtype 38 | /// \param data_type numpy data type to be converted. 39 | /// \return equivalent triton dtype 40 | TRITONSERVER_DataType numpy_to_triton_type(py::object data_type); 41 | 42 | /// Convert triton dtype to numpy dtype 43 | /// \param data_type triton dtype to be converted. 44 | /// \return equivalent numpy data type. 45 | py::object triton_to_numpy_type(TRITONSERVER_DataType data_type); 46 | 47 | /// Convert triton dtype to dlpack dtype 48 | /// \param data_type triton dtype to be converted 49 | /// \return equivalent DLPack data type. 50 | DLDataType triton_to_dlpack_type(TRITONSERVER_DataType data_type); 51 | 52 | /// Convert dlpack type to triton type 53 | /// \param data_type triton dtype to be converted 54 | /// \return equivalent Triton dtype 55 | TRITONSERVER_DataType dlpack_to_triton_type(const DLDataType& data_type); 56 | 57 | /// Convert triton data to pybind data type. 58 | /// \param data_type triton dtype to be converted. 59 | /// \return equivalent pybind numpy dtype. 60 | py::dtype triton_to_pybind_dtype(TRITONSERVER_DataType data_type); 61 | }}} // namespace triton::backend::python 62 | -------------------------------------------------------------------------------- /src/request_executor.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | 31 | #include "infer_payload.h" 32 | #include "infer_request.h" 33 | #include "infer_response.h" 34 | 35 | namespace triton { namespace backend { namespace python { 36 | 37 | TRITONSERVER_Error* CreateTritonErrorFromException( 38 | const PythonBackendException& pb_exception); 39 | 40 | struct RequestCompletionUserp { 41 | std::shared_ptr infer_payload; 42 | RequestCompletionUserp(std::shared_ptr& infer_payload) 43 | : infer_payload(infer_payload){}; 44 | }; 45 | 46 | class RequestExecutor { 47 | TRITONSERVER_ResponseAllocator* response_allocator_ = nullptr; 48 | TRITONSERVER_Server* server_; 49 | std::unique_ptr& shm_pool_; 50 | 51 | public: 52 | std::future> Infer( 53 | std::shared_ptr& infer_request, 54 | std::shared_ptr& infer_payload); 55 | 56 | RequestExecutor( 57 | std::unique_ptr& shm_pool, 58 | TRITONSERVER_Server* server); 59 | 60 | ~RequestExecutor(); 61 | }; 62 | }}} // namespace triton::backend::python 63 | -------------------------------------------------------------------------------- /src/response_sender.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | 29 | #include 30 | #include 31 | 32 | #include "infer_response.h" 33 | #include "pb_cancel.h" 34 | #include "shm_manager.h" 35 | 36 | namespace triton { namespace backend { namespace python { 37 | 38 | class ResponseSender { 39 | public: 40 | ResponseSender( 41 | intptr_t request_address, intptr_t response_factory_address, 42 | bool const* is_decoupled, 43 | const std::set& requested_output_names, 44 | std::unique_ptr& shm_pool, 45 | const std::shared_ptr& pb_cancel); 46 | intptr_t ResponseFactory() { return response_factory_address_; } 47 | ~ResponseSender(); 48 | void Send(std::shared_ptr response, const uint32_t flags); 49 | bool IsCancelled(); 50 | void UpdateStateAndCounters(InferResponse* response, const uint32_t flags); 51 | 52 | // Can be useful at stopping the model from sending any more responses. 53 | void Close(); 54 | bool IsClosed(); 55 | 56 | private: 57 | void DeleteResponseFactory(); 58 | 59 | intptr_t request_address_; 60 | intptr_t response_factory_address_; 61 | bool const* is_decoupled_; 62 | std::set requested_output_names_; 63 | std::unique_ptr& shm_pool_; 64 | std::shared_ptr pb_cancel_; 65 | 66 | std::mutex mu_; 67 | bool closed_; 68 | size_t number_of_response_sent_; 69 | 70 | std::atomic response_factory_deleted_; 71 | }; 72 | }}} // namespace triton::backend::python 73 | -------------------------------------------------------------------------------- /src/scoped_defer.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include "scoped_defer.h" 28 | 29 | namespace triton { namespace backend { namespace python { 30 | ScopedDefer::ScopedDefer(std::function task) 31 | { 32 | task_ = task; 33 | done_ = false; 34 | } 35 | 36 | void 37 | ScopedDefer::Complete() 38 | { 39 | if (!done_) { 40 | task_(); 41 | done_ = true; 42 | } 43 | } 44 | 45 | ScopedDefer::~ScopedDefer() 46 | { 47 | if (!done_) { 48 | task_(); 49 | } 50 | } 51 | 52 | }}}; // namespace triton::backend::python 53 | -------------------------------------------------------------------------------- /src/scoped_defer.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #pragma once 28 | #include 29 | 30 | namespace triton { namespace backend { namespace python { 31 | class ScopedDefer { 32 | public: 33 | ScopedDefer(std::function task); 34 | ~ScopedDefer(); 35 | void Complete(); 36 | 37 | private: 38 | std::function task_; 39 | bool done_; 40 | }; 41 | 42 | }}} // namespace triton::backend::python 43 | -------------------------------------------------------------------------------- /src/shm_monitor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | cmake_minimum_required (VERSION 3.18) 28 | 29 | pybind11_add_module( 30 | triton-shm-monitor 31 | EXCLUDE_FROM_ALL 32 | ./shm_monitor.cc 33 | ../shm_manager.h 34 | ../shm_manager.cc 35 | ) 36 | 37 | target_link_libraries( 38 | triton-shm-monitor 39 | PRIVATE 40 | -lrt # shared memory 41 | ) 42 | 43 | set_property(TARGET triton-shm-monitor PROPERTY OUTPUT_NAME triton_shm_monitor) 44 | 45 | install( 46 | TARGETS 47 | triton-shm-monitor 48 | LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/python OPTIONAL 49 | ) 50 | -------------------------------------------------------------------------------- /src/shm_monitor/shm_monitor.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | #include 28 | 29 | #include "../shm_manager.h" 30 | 31 | namespace triton { namespace backend { namespace python { 32 | namespace py = pybind11; 33 | 34 | PYBIND11_MODULE(triton_shm_monitor, m) 35 | { 36 | py::class_(m, "SharedMemoryManager") 37 | .def(py::init()) 38 | .def("free_memory", &SharedMemoryManager::FreeMemory); 39 | } 40 | 41 | }}} // namespace triton::backend::python 42 | --------------------------------------------------------------------------------