├── .clang-format
├── .devcontainer
    ├── Dockerfile
    └── devcontainer.json
├── .github
    └── workflows
    │   ├── codeql.yml
    │   └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .vscode
    └── tasks.json
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake
    └── TritonPythonBackendConfig.cmake.in
├── examples
    ├── add_sub
    │   ├── client.py
    │   ├── config.pbtxt
    │   └── model.py
    ├── auto_complete
    │   ├── README.md
    │   ├── batch_model.py
    │   ├── client.py
    │   └── nobatch_model.py
    ├── bls
    │   ├── README.md
    │   ├── async_client.py
    │   ├── async_config.pbtxt
    │   ├── async_model.py
    │   ├── sync_client.py
    │   ├── sync_config.pbtxt
    │   └── sync_model.py
    ├── bls_decoupled
    │   ├── README.md
    │   ├── async_client.py
    │   ├── async_config.pbtxt
    │   ├── async_model.py
    │   ├── sync_client.py
    │   ├── sync_config.pbtxt
    │   └── sync_model.py
    ├── custom_metrics
    │   ├── README.md
    │   ├── client.py
    │   ├── config.pbtxt
    │   └── model.py
    ├── decoupled
    │   ├── README.md
    │   ├── repeat_client.py
    │   ├── repeat_config.pbtxt
    │   ├── repeat_model.py
    │   ├── square_client.py
    │   ├── square_config.pbtxt
    │   └── square_model.py
    ├── instance_kind
    │   ├── README.md
    │   ├── client.py
    │   ├── config.pbtxt
    │   ├── model.py
    │   └── resnet50_labels.txt
    ├── jax
    │   ├── README.md
    │   ├── client.py
    │   ├── config.pbtxt
    │   └── model.py
    ├── preprocessing
    │   ├── README.md
    │   ├── client.py
    │   ├── model.py
    │   ├── model_repository
    │   │   ├── ensemble_python_resnet50
    │   │   │   └── config.pbtxt
    │   │   ├── preprocess
    │   │   │   └── config.pbtxt
    │   │   └── resnet50_trt
    │   │   │   ├── config.pbtxt
    │   │   │   └── labels.txt
    │   └── onnx_exporter.py
    └── pytorch
    │   ├── client.py
    │   ├── config.pbtxt
    │   └── model.py
├── inferentia
    ├── README.md
    ├── qa
    │   ├── Dockerfile.QA
    │   └── setup_test_enviroment_and_test.sh
    └── scripts
    │   ├── gen_triton_model.py
    │   ├── setup-pre-container.sh
    │   └── setup.sh
├── pyproject.toml
└── src
    ├── correlation_id.cc
    ├── correlation_id.h
    ├── gpu_buffers.cc
    ├── gpu_buffers.h
    ├── infer_payload.cc
    ├── infer_payload.h
    ├── infer_request.cc
    ├── infer_request.h
    ├── infer_response.cc
    ├── infer_response.h
    ├── infer_trace.cc
    ├── infer_trace.h
    ├── ipc_message.cc
    ├── ipc_message.h
    ├── libtriton_python.ldscript
    ├── memory_manager.cc
    ├── memory_manager.h
    ├── message_queue.h
    ├── metric.cc
    ├── metric.h
    ├── metric_family.cc
    ├── metric_family.h
    ├── model_loader.cc
    ├── model_loader.h
    ├── pb_bls_cancel.cc
    ├── pb_bls_cancel.h
    ├── pb_cancel.cc
    ├── pb_cancel.h
    ├── pb_env.cc
    ├── pb_env.h
    ├── pb_error.cc
    ├── pb_error.h
    ├── pb_exception.h
    ├── pb_log.cc
    ├── pb_log.h
    ├── pb_map.cc
    ├── pb_map.h
    ├── pb_memory.cc
    ├── pb_memory.h
    ├── pb_metric_reporter.cc
    ├── pb_metric_reporter.h
    ├── pb_preferred_memory.h
    ├── pb_response_iterator.cc
    ├── pb_response_iterator.h
    ├── pb_string.cc
    ├── pb_string.h
    ├── pb_stub.cc
    ├── pb_stub.h
    ├── pb_stub_utils.cc
    ├── pb_stub_utils.h
    ├── pb_tensor.cc
    ├── pb_tensor.h
    ├── pb_utils.cc
    ├── pb_utils.h
    ├── python_be.cc
    ├── python_be.h
    ├── request_executor.cc
    ├── request_executor.h
    ├── resources
        └── triton_python_backend_utils.py
    ├── response_sender.cc
    ├── response_sender.h
    ├── scoped_defer.cc
    ├── scoped_defer.h
    ├── shm_manager.cc
    ├── shm_manager.h
    ├── shm_monitor
        ├── CMakeLists.txt
        └── shm_monitor.cc
    ├── stub_launcher.cc
    └── stub_launcher.h


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | BasedOnStyle: Google
 3 | 
 4 | IndentWidth: 2
 5 | ColumnLimit: 80
 6 | ContinuationIndentWidth: 4
 7 | UseTab: Never
 8 | MaxEmptyLinesToKeep: 2
 9 | 
10 | SortIncludes: true
11 | CompactNamespaces: true
12 | ReflowComments: true
13 | 
14 | DerivePointerAlignment: false
15 | PointerAlignment: Left
16 | 
17 | AllowShortIfStatementsOnASingleLine: false
18 | AllowShortBlocksOnASingleLine: false
19 | AllowShortFunctionsOnASingleLine: Inline
20 | 
21 | AlwaysBreakAfterReturnType: TopLevelDefinitions
22 | AlignAfterOpenBracket: AlwaysBreak
23 | BreakBeforeBraces: Custom
24 | BraceWrapping:
25 |   AfterClass: false
26 |   AfterControlStatement: false
27 |   AfterEnum: false
28 |   AfterFunction: true
29 |   AfterNamespace: false
30 |   AfterStruct: false
31 |   AfterUnion: false
32 |   BeforeCatch: true
33 | 
34 | BinPackArguments: true
35 | BinPackParameters: true
36 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
37 | 
38 | IndentCaseLabels: true
39 | 


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | FROM nvcr.io/nvidia/tritonserver:24.03-py3
28 | 
29 | ARG USERNAME=triton-server
30 | 
31 | RUN apt-get update \
32 |     && apt-get install -y sudo
33 | 
34 | RUN pip3 install transformers torch
35 | 
36 | # Create the user
37 | RUN apt-get update \
38 |     && apt-get install -y sudo \
39 |     && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
40 |     && chmod 0440 /etc/sudoers.d/$USERNAME
41 | 
42 | RUN pip3 install pre-commit ipdb
43 | 
44 | RUN mkhomedir_helper triton-server
45 | 
46 | RUN apt-get install -y cmake rapidjson-dev
47 | 
48 | USER ${USERNAME}
49 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "Python Backend",
 3 | 
 4 | 	"build": {
 5 | 		"dockerfile": "Dockerfile"
 6 | 	},
 7 | 	"customizations": {
 8 | 		"vscode": {
 9 | 			"extensions": [
10 | 				"ms-python.vscode-pylance",
11 | 				"ms-python.python",
12 | 				"ms-vscode.cpptools-extension-pack",
13 | 				"ms-vscode.cmake-tools",
14 | 				"github.vscode-pull-request-github"
15 | 			]
16 | 		}
17 | 	},
18 | 	"postCreateCommand": "sudo chown -R triton-server:triton-server ~/.cache",
19 | 
20 | 	"runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined", "--gpus=all", "--shm-size=2g", "--ulimit", "stack=67108864" ],
21 | 	"mounts": [
22 | 		"source=${localEnv:HOME}/.ssh,target=/home/triton-server/.ssh,type=bind,consistency=cached",
23 | 		"source=${localEnv:HOME}/.cache/huggingface,target=/home/triton-server/.cache/huggingface,type=bind,consistency=cached"
24 | 	],
25 | 	"remoteUser": "triton-server"
26 | }
27 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "CodeQL"
28 | 
29 | on:
30 |   pull_request:
31 | 
32 | jobs:
33 |   analyze:
34 |     name: Analyze
35 |     runs-on: ubuntu-latest
36 |     permissions:
37 |       actions: read
38 |       contents: read
39 |       security-events: write
40 | 
41 |     strategy:
42 |       fail-fast: false
43 |       matrix:
44 |         language: [ 'python' ]
45 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
46 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
47 | 
48 |     steps:
49 |     - name: Checkout repository
50 |       uses: actions/checkout@v3
51 | 
52 |     # Initializes the CodeQL tools for scanning.
53 |     - name: Initialize CodeQL
54 |       uses: github/codeql-action/init@v2
55 |       with:
56 |         languages: ${{ matrix.language }}
57 |         # If you wish to specify custom queries, you can do so here or in a config file.
58 |         # By default, queries listed here will override any specified in a config file.
59 |         # Prefix the list here with "+" to use these queries and those in the config file.
60 | 
61 |         # Details on CodeQL's query packs refer to:
62 |         # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
63 |         queries: +security-and-quality
64 | 
65 | 
66 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
67 |     # If this step fails, then you should remove it and run the build manually (see below)
68 |     - name: Autobuild
69 |       uses: github/codeql-action/autobuild@v2
70 | 
71 |     # Command-line programs to run using the OS shell.
72 |     # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
73 | 
74 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
75 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
76 | 
77 |     # - run: |
78 |     #   echo "Run, Build Application using script"
79 |     #   ./location_of_script_within_repo/buildscript.sh
80 | 
81 |     - name: Perform CodeQL Analysis
82 |       uses: github/codeql-action/analyze@v2
83 |       with:
84 |         category: "/language:${{matrix.language}}"
85 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: pre-commit
28 | 
29 | on:
30 |   pull_request:
31 | 
32 | jobs:
33 |   pre-commit:
34 |     runs-on: ubuntu-22.04
35 |     steps:
36 |     - uses: actions/checkout@v3
37 |     - uses: actions/setup-python@v3
38 |     - uses: pre-commit/action@v3.0.0
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /build
  2 | *.so
  3 | builddir
  4 | 
  5 | ### Python ###
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | pip-wheel-metadata/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | pytestdebug.log
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | doc/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | # pytype static type analyzer
139 | .pytype/
140 | 
141 | # vscode
142 | .vscode/settings.json
143 | .vscode/c_cpp_properties.json
144 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | repos:
28 | - repo: https://github.com/timothycrosley/isort
29 |   rev: 5.12.0
30 |   hooks:
31 |   - id: isort
32 |     additional_dependencies: [toml]
33 | - repo: https://github.com/psf/black
34 |   rev: 23.1.0
35 |   hooks:
36 |   - id: black
37 |     types_or: [python, cython]
38 | - repo: https://github.com/PyCQA/flake8
39 |   rev: 5.0.4
40 |   hooks:
41 |   - id: flake8
42 |     args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
43 |     types_or: [python, cython]
44 | - repo: https://github.com/pre-commit/mirrors-clang-format
45 |   rev: v16.0.5
46 |   hooks:
47 |   - id: clang-format
48 |     types_or: [c, c++, cuda, proto, textproto, java]
49 |     args: ["-fallback-style=none", "-style=file", "-i"]
50 | - repo: https://github.com/codespell-project/codespell
51 |   rev: v2.2.4
52 |   hooks:
53 |   - id: codespell
54 |     additional_dependencies: [tomli]
55 |     args: ["--toml", "pyproject.toml"]
56 |     exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
57 | # More details about these pre-commit hooks here:
58 | # https://pre-commit.com/hooks.html
59 | - repo: https://github.com/pre-commit/pre-commit-hooks
60 |   rev: v4.4.0
61 |   hooks:
62 |   - id: check-case-conflict
63 |   - id: check-executables-have-shebangs
64 |   - id: check-merge-conflict
65 |   - id: check-json
66 |   - id: check-toml
67 |   - id: check-yaml
68 |   - id: check-shebang-scripts-are-executable
69 |   - id: end-of-file-fixer
70 |     types_or: [c, c++, cuda, proto, textproto, java, python]
71 |   - id: mixed-line-ending
72 |   - id: requirements-txt-fixer
73 |   - id: trailing-whitespace
74 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "2.0.0",
 3 |     "tasks": [
 4 |         {
 5 |             "label": "Configure",
 6 |             "type": "shell",
 7 |             "command": "cmake",
 8 |             "args": [
 9 |                 "-DCMAKE_INSTALL_PREFIX:STRING=/opt/tritonserver/",
10 |                 "-DTRITON_COMMON_REPO_TAG:STRING=main",
11 |                 "-DTRITON_BACKEND_REPO_TAG:STRING=main",
12 |                 "-DTRITON_CORE_REPO_TAG:STRING=main",
13 |                 "-DTRITON_ENABLE_GPU:STRING=ON",
14 |                 "-DTRITON_ENABLE_NVTX:STRING=ON",
15 |                 "-DCMAKE_INSTALL_PREFIX:STRING=${workspaceFolder}/build/install",
16 |                 "-DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE",
17 |                 "-DCMAKE_BUILD_TYPE:STRING=Debug",
18 |                 "-DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc",
19 |                 "-DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++",
20 |                 "-S${workspaceFolder}",
21 |                 "-B${workspaceFolder}/build",
22 |                 "-G",
23 |                 "Unix Makefiles"
24 |             ],
25 |             "problemMatcher": []
26 |         },
27 |         {
28 |             "label": "Build",
29 |             "type": "shell",
30 |             "command": "cmake",
31 |             "args": [
32 |                 "--build",
33 |                 "/${workspaceFolder}/build",
34 |                 "--config",
35 |                 "Debug",
36 |                 "--target",
37 |                 "all",
38 |                 "-j",
39 |                 "18",
40 |                 "--"
41 |             ]
42 |         },
43 |         {
44 |             "label": "Install",
45 |             "type": "shell",
46 |             "command": "cmake",
47 |             "args": [
48 |                 "--build",
49 |                 "${workspaceFolder}/build",
50 |                 "--config",
51 |                 "Debug",
52 |                 "--target",
53 |                 "install",
54 |                 "-j",
55 |                 "18",
56 |                 "--"
57 |             ]
58 |         },
59 |         {
60 |             "label": "Move",
61 |             "type": "shell",
62 |             "command": "sudo",
63 |             "args": [
64 |                 "cp",
65 |                 "-r",
66 |                 "${workspaceFolder}/build/install/backends/python/*",
67 |                 "/opt/tritonserver/backends/python"
68 |             ]
69 |         },
70 |         {
71 |             "label": "Build Python Backend",
72 |             "dependsOrder": "sequence",
73 |             "dependsOn": [
74 |                 "Configure",
75 |                 "Build",
76 |                 "Install",
77 |                 "Move"
78 |             ],
79 |             "group": {
80 |                 "kind": "build",
81 |                 "isDefault": true
82 |             }
83 |         }
84 |     ]
85 | }
86 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions
 5 | are met:
 6 |  * Redistributions of source code must retain the above copyright
 7 |    notice, this list of conditions and the following disclaimer.
 8 |  * Redistributions in binary form must reproduce the above copyright
 9 |    notice, this list of conditions and the following disclaimer in the
10 |    documentation and/or other materials provided with the distribution.
11 |  * Neither the name of NVIDIA CORPORATION nor the names of its
12 |    contributors may be used to endorse or promote products derived
13 |    from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/cmake/TritonPythonBackendConfig.cmake.in:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | include(CMakeFindDependencyMacro)
29 | 
30 | get_filename_component(
31 |   TRITONPYTHONBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
32 | )
33 | 
34 | list(APPEND CMAKE_MODULE_PATH ${TRITONPYTHONBACKEND_CMAKE_DIR})
35 | 
36 | if(NOT TARGET TritonPythonBackend::triton-python-backend)
37 |   include("${TRITONPYTHONBACKEND_CMAKE_DIR}/TritonPythonBackendTargets.cmake")
38 | endif()
39 | 
40 | set(TRITONPYTHONBACKEND_LIBRARIES TritonPythonBackend::triton-python-backend)


--------------------------------------------------------------------------------
/examples/add_sub/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import tritonclient.http as httpclient
31 | from tritonclient.utils import *
32 | 
33 | model_name = "add_sub"
34 | shape = [4]
35 | 
36 | with httpclient.InferenceServerClient("localhost:8000") as client:
37 |     input0_data = np.random.rand(*shape).astype(np.float32)
38 |     input1_data = np.random.rand(*shape).astype(np.float32)
39 |     inputs = [
40 |         httpclient.InferInput(
41 |             "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
42 |         ),
43 |         httpclient.InferInput(
44 |             "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
45 |         ),
46 |     ]
47 | 
48 |     inputs[0].set_data_from_numpy(input0_data)
49 |     inputs[1].set_data_from_numpy(input1_data)
50 | 
51 |     outputs = [
52 |         httpclient.InferRequestedOutput("OUTPUT0"),
53 |         httpclient.InferRequestedOutput("OUTPUT1"),
54 |     ]
55 | 
56 |     response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
57 | 
58 |     result = response.get_response()
59 |     output0_data = response.as_numpy("OUTPUT0")
60 |     output1_data = response.as_numpy("OUTPUT1")
61 | 
62 |     print(
63 |         "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
64 |             input0_data, input1_data, output0_data
65 |         )
66 |     )
67 |     print(
68 |         "INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format(
69 |             input0_data, input1_data, output1_data
70 |         )
71 |     )
72 | 
73 |     if not np.allclose(input0_data + input1_data, output0_data):
74 |         print("add_sub example error: incorrect sum")
75 |         sys.exit(1)
76 | 
77 |     if not np.allclose(input0_data - input1_data, output1_data):
78 |         print("add_sub example error: incorrect difference")
79 |         sys.exit(1)
80 | 
81 |     print("PASS: add_sub")
82 |     sys.exit(0)
83 | 


--------------------------------------------------------------------------------
/examples/add_sub/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "add_sub"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "INPUT0"
33 |     data_type: TYPE_FP32
34 |     dims: [ 4 ]
35 |   }
36 | ]
37 | input [
38 |   {
39 |     name: "INPUT1"
40 |     data_type: TYPE_FP32
41 |     dims: [ 4 ]
42 |   }
43 | ]
44 | output [
45 |   {
46 |     name: "OUTPUT0"
47 |     data_type: TYPE_FP32
48 |     dims: [ 4 ]
49 |   }
50 | ]
51 | output [
52 |   {
53 |     name: "OUTPUT1"
54 |     data_type: TYPE_FP32
55 |     dims: [ 4 ]
56 |   }
57 | ]
58 | 
59 | instance_group [{ kind: KIND_CPU }]
60 | 


--------------------------------------------------------------------------------
/examples/auto_complete/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import tritonclient.http as httpclient
31 | from tritonclient.utils import *
32 | 
33 | nobatch_model_name = "nobatch_auto_complete"
34 | batch_model_name = "batch_auto_complete"
35 | 
36 | 
37 | def validate_ios(config, expected_ios, model_name):
38 |     for io in config:
39 |         for expected_io in expected_ios:
40 |             if io["name"] == expected_io["name"]:
41 |                 if io["data_type"] != expected_io["data_type"]:
42 |                     print("model '" + model_name + "' has unexpected data_type")
43 |                     sys.exit(1)
44 |                 elif io["dims"] != expected_io["dims"]:
45 |                     print("model '" + model_name + "' has unexpected dims")
46 |                     sys.exit(1)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     with httpclient.InferenceServerClient("localhost:8000") as client:
51 |         expected_max_batch_size = {
52 |             "nobatch_auto_complete": 0,
53 |             "batch_auto_complete": 4,
54 |         }
55 |         expected_inputs = [
56 |             {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]},
57 |             {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]},
58 |         ]
59 |         expected_outputs = [
60 |             {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]},
61 |             {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]},
62 |         ]
63 | 
64 |         models = [nobatch_model_name, batch_model_name]
65 | 
66 |         for model_name in models:
67 |             # Validate the auto-complete model configuration
68 |             model_config = client.get_model_config(model_name)
69 |             if model_config["max_batch_size"] != expected_max_batch_size[model_name]:
70 |                 print("model '" + model_name + "' has unexpected max_batch_size")
71 |                 sys.exit(1)
72 |             validate_ios(model_config["input"], expected_inputs, model_name)
73 |             validate_ios(model_config["output"], expected_outputs, model_name)
74 |             print(
75 |                 "'"
76 |                 + model_name
77 |                 + "' configuration matches the expected "
78 |                 + "auto complete configuration\n"
79 |             )
80 | 
81 |     print("PASS: auto_complete")
82 | 
83 |     sys.exit(0)
84 | 


--------------------------------------------------------------------------------
/examples/bls/async_client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import tritonclient.http as httpclient
31 | from tritonclient.utils import *
32 | 
33 | model_name = "bls_async"
34 | shape = [4]
35 | 
36 | with httpclient.InferenceServerClient("localhost:8000") as client:
37 |     input0_data = np.random.rand(*shape).astype(np.float32)
38 |     input1_data = np.random.rand(*shape).astype(np.float32)
39 |     inputs = [
40 |         httpclient.InferInput(
41 |             "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
42 |         ),
43 |         httpclient.InferInput(
44 |             "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
45 |         ),
46 |     ]
47 | 
48 |     inputs[0].set_data_from_numpy(input0_data)
49 |     inputs[1].set_data_from_numpy(input1_data)
50 | 
51 |     outputs = [
52 |         httpclient.InferRequestedOutput("OUTPUT0"),
53 |         httpclient.InferRequestedOutput("OUTPUT1"),
54 |     ]
55 | 
56 |     response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
57 | 
58 |     result = response.get_response()
59 |     output0_data = response.as_numpy("OUTPUT0")
60 |     output1_data = response.as_numpy("OUTPUT1")
61 | 
62 |     print(
63 |         "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
64 |             input0_data, input1_data, output0_data
65 |         )
66 |     )
67 |     print(
68 |         "INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format(
69 |             input0_data, input1_data, output1_data
70 |         )
71 |     )
72 | 
73 |     if not np.allclose(input0_data + input1_data, output0_data):
74 |         print("BLS async example error: incorrect sum")
75 |         sys.exit(1)
76 | 
77 |     if not np.allclose(input0_data - input1_data, output1_data):
78 |         print("BLS async example error: incorrect difference")
79 |         sys.exit(1)
80 | 
81 |     print("PASS: BLS Async")
82 |     sys.exit(0)
83 | 


--------------------------------------------------------------------------------
/examples/bls/async_config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "bls_async"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "INPUT0"
33 |     data_type: TYPE_FP32
34 |     dims: [ 4 ]
35 |   }
36 | ]
37 | input [
38 |   {
39 |     name: "INPUT1"
40 |     data_type: TYPE_FP32
41 |     dims: [ 4 ]
42 |   }
43 | ]
44 | output [
45 |   {
46 |     name: "OUTPUT0"
47 |     data_type: TYPE_FP32
48 |     dims: [ 4 ]
49 |   }
50 | ]
51 | output [
52 |   {
53 |     name: "OUTPUT1"
54 |     data_type: TYPE_FP32
55 |     dims: [ 4 ]
56 |   }
57 | ]
58 | 
59 | instance_group [{ kind: KIND_CPU }]
60 | 


--------------------------------------------------------------------------------
/examples/bls/sync_config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "bls_sync"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "MODEL_NAME"
33 |     data_type: TYPE_STRING
34 |     dims: [ 1 ]
35 |   }
36 | ]
37 | input [
38 |   {
39 |     name: "INPUT0"
40 |     data_type: TYPE_FP32
41 |     dims: [ 4 ]
42 |   }
43 | ]
44 | input [
45 |   {
46 |     name: "INPUT1"
47 |     data_type: TYPE_FP32
48 |     dims: [ 4 ]
49 |   }
50 | ]
51 | output [
52 |   {
53 |     name: "OUTPUT0"
54 |     data_type: TYPE_FP32
55 |     dims: [ 4 ]
56 |   }
57 | ]
58 | output [
59 |   {
60 |     name: "OUTPUT1"
61 |     data_type: TYPE_FP32
62 |     dims: [ 4 ]
63 |   }
64 | ]
65 | 
66 | instance_group [{ kind: KIND_CPU }]
67 | 


--------------------------------------------------------------------------------
/examples/bls_decoupled/async_client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import tritonclient.http as httpclient
31 | from tritonclient.utils import *
32 | 
33 | model_name = "bls_decoupled_async"
34 | shape = [1]
35 | 
36 | with httpclient.InferenceServerClient("localhost:8000") as client:
37 |     in_values = [4, 2, 0, 1]
38 | 
39 |     for in_value in in_values:
40 |         input_data = np.array([in_value], dtype=np.int32)
41 |         inputs = [
42 |             httpclient.InferInput(
43 |                 "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
44 |             )
45 |         ]
46 |         inputs[0].set_data_from_numpy(input_data)
47 |         outputs = [httpclient.InferRequestedOutput("SUM")]
48 | 
49 |         response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
50 | 
51 |         result = response.get_response()
52 |         # output_data contains two times of the square value of the input value.
53 |         output_data = response.as_numpy("SUM")
54 |         print("==========model result==========")
55 |         print(
56 |             "Two times the square value of {} is {}\n".format(input_data, output_data)
57 |         )
58 | 
59 |         if not np.allclose((2 * input_data * input_data), output_data):
60 |             print(
61 |                 "BLS Decoupled Async example error: incorrect output value. Expected {}, got {}.".format(
62 |                     (2 * input_data * input_data), output_data
63 |                 )
64 |             )
65 |             sys.exit(1)
66 | 
67 |     print("PASS: BLS Decoupled Async")
68 |     sys.exit(0)
69 | 


--------------------------------------------------------------------------------
/examples/bls_decoupled/async_config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "bls_decoupled_async"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "IN"
33 |     data_type: TYPE_INT32
34 |     dims: [ 1 ]
35 |   }
36 | ]
37 | output [
38 |   {
39 |     name: "SUM"
40 |     data_type: TYPE_INT32
41 |     dims: [ 1 ]
42 |   }
43 | ]
44 | 
45 | instance_group [{ kind: KIND_CPU }]
46 | 


--------------------------------------------------------------------------------
/examples/bls_decoupled/sync_client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import tritonclient.http as httpclient
31 | from tritonclient.utils import *
32 | 
33 | model_name = "bls_decoupled_sync"
34 | shape = [1]
35 | 
36 | with httpclient.InferenceServerClient("localhost:8000") as client:
37 |     in_values = [4, 2, 0, 1]
38 | 
39 |     for in_value in in_values:
40 |         input_data = np.array([in_value], dtype=np.int32)
41 |         inputs = [
42 |             httpclient.InferInput(
43 |                 "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
44 |             )
45 |         ]
46 |         inputs[0].set_data_from_numpy(input_data)
47 |         outputs = [httpclient.InferRequestedOutput("SUM")]
48 | 
49 |         response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
50 | 
51 |         result = response.get_response()
52 |         output_data = response.as_numpy("SUM")
53 |         print("==========model result==========")
54 |         print("The square value of {} is {}\n".format(input_data, output_data))
55 | 
56 |         if not np.allclose(input_data * input_data, output_data):
57 |             print(
58 |                 "BLS Decoupled Sync example error: incorrect output value. Expected {}, got {}."
59 |             ).format(input_data * input_data, output_data)
60 |             sys.exit(1)
61 | 
62 |     print("PASS: BLS Decoupled Sync")
63 |     sys.exit(0)
64 | 


--------------------------------------------------------------------------------
/examples/bls_decoupled/sync_config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "bls_decoupled_sync"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "IN"
33 |     data_type: TYPE_INT32
34 |     dims: [ 1 ]
35 |   }
36 | ]
37 | output [
38 |   {
39 |     name: "SUM"
40 |     data_type: TYPE_INT32
41 |     dims: [ 1 ]
42 |   }
43 | ]
44 | 
45 | instance_group [{ kind: KIND_CPU }]
46 | 


--------------------------------------------------------------------------------
/examples/custom_metrics/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Custom Metrics Example
30 | 
31 | In this section we demonstrate an end-to-end example for
32 | [Custom Metrics API](../../README.md#custom-metrics) in Python backend. The
33 | [model repository](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_repository.md)
34 | should contain [custom_metrics](./model.py) model. The
35 | [custom_metrics](./model.py) model uses
36 | [Custom Metrics API](../../README.md#custom-metrics) to register and collect
37 | custom metrics.
38 | 
39 | ## Deploying the Custom Metrics Models
40 | 
41 | 1. Create the model repository:
42 | 
43 | ```console
44 | mkdir -p models/custom_metrics/1/
45 | 
46 | # Copy the Python models
47 | cp examples/custom_metrics/model.py models/custom_metrics/1/model.py
48 | cp examples/custom_metrics/config.pbtxt models/custom_metrics/config.pbtxt
49 | ```
50 | 
51 | 2. Start the tritonserver:
52 | 
53 | ```
54 | tritonserver --model-repository `pwd`/models
55 | ```
56 | 
57 | 3. Send inference requests to server:
58 | 
59 | ```
60 | python3 examples/custom_metrics/client.py
61 | ```
62 | 
63 | You should see an output similar to the output below in the client terminal:
64 | 
65 | ```
66 | custom_metrics example: found pattern '# HELP requests_process_latency_ns Cumulative time spent processing requests' in metrics
67 | custom_metrics example: found pattern '# TYPE requests_process_latency_ns counter' in metrics
68 | custom_metrics example: found pattern 'requests_process_latency_ns{model="custom_metrics",version="1"}' in metrics
69 | PASS: custom_metrics
70 | ```
71 | 
72 | In the terminal that runs Triton Server, you should see an output similar to
73 | the output below:
74 | ```
75 | Cumulative requests processing latency: 223406.0
76 | ```
77 | 
78 | The [model.py](./model.py) model file is heavily commented with
79 | explanations about each of the function calls.
80 | 
81 | ### Explanation of the Client Output
82 | 
83 | The [client.py](./client.py) sends a HTTP request with url
84 | `http://localhost:8002/metrics` to fetch the metrics from Triton server. The
85 | client then verifies if the custom metrics added in the model file are
86 | correctly reported.
87 | 


--------------------------------------------------------------------------------
/examples/custom_metrics/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION& AFFILIATES.All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | # * Redistributions of source code must retain the above copyright
 7 | # notice, this list of conditions and the following disclaimer.
 8 | # * Redistributions in binary form must reproduce the above copyright
 9 | # notice, this list of conditions and the following disclaimer in the
10 | # documentation and / or other materials provided with the distribution.
11 | # * Neither the name of NVIDIA CORPORATION nor the names of its
12 | # contributors may be used to endorse or promote products derived
13 | # from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import requests
31 | import tritonclient.http as httpclient
32 | from tritonclient.utils import *
33 | 
34 | model_name = "custom_metrics"
35 | shape = [4]
36 | 
37 | 
38 | def get_metrics():
39 |     metrics_url = "http://localhost:8002/metrics"
40 |     r = requests.get(metrics_url)
41 |     r.raise_for_status()
42 |     return r.text
43 | 
44 | 
45 | with httpclient.InferenceServerClient("localhost:8000") as client:
46 |     input0_data = np.random.rand(*shape).astype(np.float32)
47 |     input1_data = np.random.rand(*shape).astype(np.float32)
48 |     inputs = [
49 |         httpclient.InferInput(
50 |             "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
51 |         ),
52 |         httpclient.InferInput(
53 |             "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
54 |         ),
55 |     ]
56 | 
57 |     inputs[0].set_data_from_numpy(input0_data)
58 |     inputs[1].set_data_from_numpy(input1_data)
59 | 
60 |     outputs = [
61 |         httpclient.InferRequestedOutput("OUTPUT0"),
62 |         httpclient.InferRequestedOutput("OUTPUT1"),
63 |     ]
64 | 
65 |     response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
66 | 
67 |     output0_data = response.as_numpy("OUTPUT0")
68 |     output1_data = response.as_numpy("OUTPUT1")
69 | 
70 |     if not np.allclose(input0_data + input1_data, output0_data):
71 |         print("custom_metrics example error: incorrect sum")
72 |         sys.exit(1)
73 | 
74 |     if not np.allclose(input0_data - input1_data, output1_data):
75 |         print("custom_metrics example error: incorrect difference")
76 |         sys.exit(1)
77 | 
78 |     metrics = get_metrics()
79 |     patterns = [
80 |         "# HELP requests_process_latency_ns Cumulative time spent processing requests",
81 |         "# TYPE requests_process_latency_ns counter",
82 |         'requests_process_latency_ns{model="custom_metrics",version="1"}',
83 |     ]
84 |     for pattern in patterns:
85 |         if pattern not in metrics:
86 |             print(
87 |                 "custom_metrics example error: missing pattern '{}' in metrics".format(
88 |                     pattern
89 |                 )
90 |             )
91 |             sys.exit(1)
92 |         else:
93 |             print(
94 |                 "custom_metrics example: found pattern '{}' in metrics".format(pattern)
95 |             )
96 | 
97 |     print("PASS: custom_metrics")
98 |     sys.exit(0)
99 | 


--------------------------------------------------------------------------------
/examples/custom_metrics/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "custom_metrics"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "INPUT0"
33 |     data_type: TYPE_FP32
34 |     dims: [ 4 ]
35 |   }
36 | ]
37 | input [
38 |   {
39 |     name: "INPUT1"
40 |     data_type: TYPE_FP32
41 |     dims: [ 4 ]
42 |   }
43 | ]
44 | output [
45 |   {
46 |     name: "OUTPUT0"
47 |     data_type: TYPE_FP32
48 |     dims: [ 4 ]
49 |   }
50 | ]
51 | output [
52 |   {
53 |     name: "OUTPUT1"
54 |     data_type: TYPE_FP32
55 |     dims: [ 4 ]
56 |   }
57 | ]
58 | 
59 | instance_group [
60 |   {
61 |     count: 3
62 |     kind: KIND_CPU
63 |   }
64 | ]
65 | 
66 | 


--------------------------------------------------------------------------------
/examples/decoupled/repeat_config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "repeat_int32"
28 | backend: "python"
29 | max_batch_size: 0
30 | model_transaction_policy {
31 |   decoupled: True
32 | }
33 | input [
34 |   {
35 |     name: "IN"
36 |     data_type: TYPE_INT32
37 |     dims: [ -1 ]
38 |   },
39 |   {
40 |     name: "DELAY"
41 |     data_type: TYPE_UINT32
42 |     dims: [ -1 ]
43 |   },
44 |   {
45 |     name: "WAIT"
46 |     data_type: TYPE_UINT32
47 |     dims: [ 1 ]
48 |   }
49 | ]
50 | output [
51 |   {
52 |     name: "OUT"
53 |     data_type: TYPE_INT32
54 |     dims: [ 1 ]
55 |   },
56 |   {
57 |     name: "IDX"
58 |     data_type: TYPE_UINT32
59 |     dims: [ 1 ]
60 |   }
61 | ]
62 | instance_group [{ kind: KIND_CPU }]
63 | 


--------------------------------------------------------------------------------
/examples/decoupled/square_config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "square_int32"
28 | backend: "python"
29 | max_batch_size: 0
30 | model_transaction_policy {
31 |   decoupled: True
32 | }
33 | input [
34 |   {
35 |     name: "IN"
36 |     data_type: TYPE_INT32
37 |     dims: [ 1 ]
38 |   }
39 | ]
40 | output [
41 |   {
42 |     name: "OUT"
43 |     data_type: TYPE_INT32
44 |     dims: [ 1 ]
45 |   }
46 | ]
47 | instance_group [{ kind: KIND_CPU }]
48 | 
49 | 


--------------------------------------------------------------------------------
/examples/instance_kind/client.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without
  4 | # modification, are permitted provided that the following conditions
  5 | # are met:
  6 | #  * Redistributions of source code must retain the above copyright
  7 | #    notice, this list of conditions and the following disclaimer.
  8 | #  * Redistributions in binary form must reproduce the above copyright
  9 | #    notice, this list of conditions and the following disclaimer in the
 10 | #    documentation and/or other materials provided with the distribution.
 11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | #    contributors may be used to endorse or promote products derived
 13 | #    from this software without specific prior written permission.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import argparse
 28 | import json
 29 | import sys
 30 | import warnings
 31 | 
 32 | import numpy as np
 33 | import torch
 34 | import tritonclient.http as httpclient
 35 | from tritonclient.utils import *
 36 | 
 37 | warnings.filterwarnings("ignore")
 38 | 
 39 | if __name__ == "__main__":
 40 |     parser = argparse.ArgumentParser()
 41 |     parser.add_argument(
 42 |         "--model_name",
 43 |         type=str,
 44 |         required=False,
 45 |         default="resnet50",
 46 |         help="Model name",
 47 |     )
 48 |     parser.add_argument(
 49 |         "--image_url",
 50 |         type=str,
 51 |         required=False,
 52 |         default="http://images.cocodataset.org/test2017/000000557146.jpg",
 53 |         help="Image URL. Default is:\
 54 |                             http://images.cocodataset.org/test2017/000000557146.jpg",
 55 |     )
 56 |     parser.add_argument(
 57 |         "--url",
 58 |         type=str,
 59 |         required=False,
 60 |         default="localhost:8000",
 61 |         help="Inference server URL. Default is localhost:8000.",
 62 |     )
 63 |     parser.add_argument(
 64 |         "-v",
 65 |         "--verbose",
 66 |         action="store_true",
 67 |         required=False,
 68 |         default=False,
 69 |         help="Enable verbose output",
 70 |     )
 71 |     parser.add_argument(
 72 |         "--label_file",
 73 |         type=str,
 74 |         required=False,
 75 |         default="./resnet50_labels.txt",
 76 |         help="Path to the file with text representation \
 77 |                         of available labels",
 78 |     )
 79 |     args = parser.parse_args()
 80 | 
 81 |     utils = torch.hub.load(
 82 |         "NVIDIA/DeepLearningExamples:torchhub",
 83 |         "nvidia_convnets_processing_utils",
 84 |         skip_validation=True,
 85 |     )
 86 | 
 87 |     try:
 88 |         triton_client = httpclient.InferenceServerClient(args.url)
 89 |     except Exception as e:
 90 |         print("channel creation failed: " + str(e))
 91 |         sys.exit(1)
 92 | 
 93 |     with open(args.label_file) as f:
 94 |         labels_dict = {idx: line.strip() for idx, line in enumerate(f)}
 95 | 
 96 |     if args.verbose:
 97 |         print(json.dumps(triton_client.get_model_config(args.model_name), indent=4))
 98 | 
 99 |     input_name = "INPUT"
100 |     output_name = "OUTPUT"
101 |     batch = np.asarray(utils.prepare_input_from_uri(args.image_url))
102 | 
103 |     input = httpclient.InferInput(input_name, batch.shape, "FP32")
104 |     output = httpclient.InferRequestedOutput(output_name)
105 | 
106 |     input.set_data_from_numpy(batch)
107 |     results = triton_client.infer(
108 |         model_name=args.model_name, inputs=[input], outputs=[output]
109 |     )
110 | 
111 |     output_data = results.as_numpy(output_name)
112 |     max_id = np.argmax(output_data, axis=1)[0]
113 |     print("Results is class: {}".format(labels_dict[max_id]))
114 | 
115 |     print("PASS: ResNet50 instance kind")
116 |     sys.exit(0)
117 | 


--------------------------------------------------------------------------------
/examples/instance_kind/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "resnet50"
28 | backend: "python"
29 | max_batch_size: 128
30 | input {
31 |     name: "INPUT"
32 |     data_type: TYPE_FP32
33 |     format: FORMAT_NCHW
34 |     dims: [ 3, 224, 224 ]
35 |   }
36 | output {
37 |     name: "OUTPUT"
38 |     data_type: TYPE_FP32
39 |     dims: [ 1000 ]
40 |   }
41 | 
42 | instance_group [{ kind: KIND_CPU }]
43 | 


--------------------------------------------------------------------------------
/examples/instance_kind/model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import numpy as np
28 | import torch
29 | import triton_python_backend_utils as pb_utils
30 | from torch.utils.dlpack import to_dlpack
31 | 
32 | 
33 | class TritonPythonModel:
34 |     def initialize(self, args):
35 |         """
36 |         This function initializes pre-trained ResNet50 model,
37 |         depending on the value specified by an `instance_group` parameter
38 |         in `config.pbtxt`.
39 | 
40 |         Depending on what `instance_group` was specified in
41 |         the config.pbtxt file (KIND_CPU or KIND_GPU), the model instance
42 |         will be initialised on a cpu, a gpu, or both. If `instance_group` was
43 |         not specified in the config file, then models will be loaded onto
44 |         the default device of the framework.
45 |         """
46 |         # Here we set up the device onto which our model will beloaded,
47 |         # based on specified `model_instance_kind` and `model_instance_device_id`
48 |         # fields.
49 |         device = "cuda" if args["model_instance_kind"] == "GPU" else "cpu"
50 |         device_id = args["model_instance_device_id"]
51 |         self.device = f"{device}:{device_id}"
52 |         # This example is configured to work with torch=1.13
53 |         # and torchvision=0.14. Thus, we need to provide a proper tag `0.14.1`
54 |         # to make sure loaded Resnet50 is compatible with
55 |         # installed `torchvision`.
56 |         # Refer to README for installation instructions.
57 |         self.model = (
58 |             torch.hub.load(
59 |                 "pytorch/vision:v0.14.1",
60 |                 "resnet50",
61 |                 weights="IMAGENET1K_V2",
62 |                 skip_validation=True,
63 |             )
64 |             .to(self.device)
65 |             .eval()
66 |         )
67 | 
68 |     def execute(self, requests):
69 |         """
70 |         This function receives a list of requests (`pb_utils.InferenceRequest`),
71 |         performs inference on every request and appends it to responses.
72 |         """
73 |         responses = []
74 |         for request in requests:
75 |             input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT")
76 |             with torch.no_grad():
77 |                 result = self.model(
78 |                     torch.as_tensor(input_tensor.as_numpy(), device=self.device)
79 |                 )
80 |             out_tensor = pb_utils.Tensor.from_dlpack("OUTPUT", to_dlpack(result))
81 |             responses.append(pb_utils.InferenceResponse([out_tensor]))
82 |         return responses
83 | 


--------------------------------------------------------------------------------
/examples/jax/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import tritonclient.http as httpclient
31 | from tritonclient.utils import *
32 | 
33 | model_name = "jax"
34 | shape = [4]
35 | 
36 | with httpclient.InferenceServerClient("localhost:8000") as client:
37 |     input0_data = np.random.rand(*shape).astype(np.float32)
38 |     input1_data = np.random.rand(*shape).astype(np.float32)
39 |     inputs = [
40 |         httpclient.InferInput(
41 |             "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
42 |         ),
43 |         httpclient.InferInput(
44 |             "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
45 |         ),
46 |     ]
47 | 
48 |     inputs[0].set_data_from_numpy(input0_data)
49 |     inputs[1].set_data_from_numpy(input1_data)
50 | 
51 |     outputs = [
52 |         httpclient.InferRequestedOutput("OUTPUT0"),
53 |         httpclient.InferRequestedOutput("OUTPUT1"),
54 |     ]
55 | 
56 |     response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
57 | 
58 |     result = response.get_response()
59 |     output0_data = response.as_numpy("OUTPUT0")
60 |     output1_data = response.as_numpy("OUTPUT1")
61 | 
62 |     print(
63 |         "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
64 |             input0_data, input1_data, output0_data
65 |         )
66 |     )
67 |     print(
68 |         "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format(
69 |             input0_data, input1_data, output1_data
70 |         )
71 |     )
72 | 
73 |     if not np.allclose(input0_data + input1_data, output0_data):
74 |         print("jax example error: incorrect sum")
75 |         sys.exit(1)
76 | 
77 |     if not np.allclose(input0_data - input1_data, output1_data):
78 |         print("jax example error: incorrect difference")
79 |         sys.exit(1)
80 | 
81 |     print("PASS: jax")
82 |     sys.exit(0)
83 | 


--------------------------------------------------------------------------------
/examples/jax/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "jax"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "INPUT0"
33 |     data_type: TYPE_FP32
34 |     dims: [ 4 ]
35 |   }
36 | ]
37 | input [
38 |   {
39 |     name: "INPUT1"
40 |     data_type: TYPE_FP32
41 |     dims: [ 4 ]
42 |   }
43 | ]
44 | output [
45 |   {
46 |     name: "OUTPUT0"
47 |     data_type: TYPE_FP32
48 |     dims: [ 4 ]
49 |   }
50 | ]
51 | output [
52 |   {
53 |     name: "OUTPUT1"
54 |     data_type: TYPE_FP32
55 |     dims: [ 4 ]
56 |   }
57 | ]
58 | 
59 | instance_group [{ kind: KIND_CPU }]
60 | 


--------------------------------------------------------------------------------
/examples/preprocessing/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Preprocessing Using Python Backend Example
30 | This example shows how to preprocess your inputs using Python backend before it is passed to the TensorRT model for inference. This ensemble model includes an image preprocessing model (preprocess) and a TensorRT model (resnet50_trt) to do inference.
31 | 
32 | **1. Converting PyTorch Model to ONNX format:**
33 | 
34 | Run onnx_exporter.py to convert ResNet50 PyTorch model to ONNX format. Width and height dims are fixed at 224 but dynamic axes arguments for dynamic batching are used. Commands from the 2. and 3. subsections shall be executed within this Docker container.
35 | 
36 |     docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/pytorch:xx.yy-py3 bash
37 |     pip install numpy pillow torchvision
38 |     python onnx_exporter.py --save model.onnx
39 | 
40 | **2. Create the model repository:**
41 | 
42 |     mkdir -p model_repository/ensemble_python_resnet50/1
43 |     mkdir -p model_repository/preprocess/1
44 |     mkdir -p model_repository/resnet50_trt/1
45 | 
46 |     # Copy the Python model
47 |     cp model.py model_repository/preprocess/1
48 | 
49 | **3. Build a TensorRT engine for the ONNX model**
50 | 
51 | Set the arguments for enabling fp16 precision --fp16. To enable dynamic shapes use --minShapes, --optShapes, and maxShapes with --explicitBatch:
52 | 
53 |     trtexec --onnx=model.onnx --saveEngine=./model_repository/resnet50_trt/1/model.plan --explicitBatch --minShapes=input:1x3x224x224 --optShapes=input:1x3x224x224 --maxShapes=input:256x3x224x224 --fp16
54 | 
55 | **4. Run the command below to start the server container:**
56 | 
57 | Under python_backend/examples/preprocessing, run this command to start the server docker container:
58 | 
59 |     docker run --gpus=all -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd):/workspace/ -v/$(pwd)/model_repository:/models nvcr.io/nvidia/tritonserver:xx.yy-py3 bash
60 |     pip install numpy pillow torchvision
61 |     tritonserver --model-repository=/models
62 | 
63 | **5. Start the client to test:**
64 | 
65 | Under python_backend/examples/preprocessing, run the commands below to start the client Docker container:
66 | 
67 |     wget https://raw.githubusercontent.com/triton-inference-server/server/main/qa/images/mug.jpg -O "mug.jpg"
68 |     docker run --rm --net=host -v $(pwd):/workspace/ nvcr.io/nvidia/tritonserver:xx.yy-py3-sdk python client.py --image mug.jpg
69 |     The result of classification is:COFFEE MUG
70 | 
71 | Here, since we input an image of "mug" and the inference result is "COFFEE MUG" which is correct.
72 | 


--------------------------------------------------------------------------------
/examples/preprocessing/client.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without
  4 | # modification, are permitted provided that the following conditions
  5 | # are met:
  6 | #  * Redistributions of source code must retain the above copyright
  7 | #    notice, this list of conditions and the following disclaimer.
  8 | #  * Redistributions in binary form must reproduce the above copyright
  9 | #    notice, this list of conditions and the following disclaimer in the
 10 | #    documentation and/or other materials provided with the distribution.
 11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | #    contributors may be used to endorse or promote products derived
 13 | #    from this software without specific prior written permission.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import argparse
 28 | import json
 29 | import sys
 30 | 
 31 | import numpy as np
 32 | import tritonclient.grpc as tritongrpcclient
 33 | 
 34 | 
 35 | def load_image(img_path: str):
 36 |     """
 37 |     Loads an encoded image as an array of bytes.
 38 | 
 39 |     """
 40 |     return np.fromfile(img_path, dtype="uint8")
 41 | 
 42 | 
 43 | if __name__ == "__main__":
 44 |     parser = argparse.ArgumentParser()
 45 |     parser.add_argument(
 46 |         "--model_name",
 47 |         type=str,
 48 |         required=False,
 49 |         default="ensemble_python_resnet50",
 50 |         help="Model name",
 51 |     )
 52 |     parser.add_argument("--image", type=str, required=True, help="Path to the image")
 53 |     parser.add_argument(
 54 |         "--url",
 55 |         type=str,
 56 |         required=False,
 57 |         default="localhost:8001",
 58 |         help="Inference server URL. Default is localhost:8001.",
 59 |     )
 60 |     parser.add_argument(
 61 |         "-v",
 62 |         "--verbose",
 63 |         action="store_true",
 64 |         required=False,
 65 |         default=False,
 66 |         help="Enable verbose output",
 67 |     )
 68 |     parser.add_argument(
 69 |         "--label_file",
 70 |         type=str,
 71 |         default="./model_repository/resnet50_trt/labels.txt",
 72 |         help="Path to the file with text representation of available labels",
 73 |     )
 74 |     args = parser.parse_args()
 75 | 
 76 |     try:
 77 |         triton_client = tritongrpcclient.InferenceServerClient(
 78 |             url=args.url, verbose=args.verbose
 79 |         )
 80 |     except Exception as e:
 81 |         print("channel creation failed: " + str(e))
 82 |         sys.exit(1)
 83 | 
 84 |     with open(args.label_file) as f:
 85 |         labels_dict = {idx: line.strip() for idx, line in enumerate(f)}
 86 | 
 87 |     inputs = []
 88 |     outputs = []
 89 |     input_name = "INPUT"
 90 |     output_name = "OUTPUT"
 91 |     image_data = load_image(args.image)
 92 |     image_data = np.expand_dims(image_data, axis=0)
 93 | 
 94 |     inputs.append(tritongrpcclient.InferInput(input_name, image_data.shape, "UINT8"))
 95 |     outputs.append(tritongrpcclient.InferRequestedOutput(output_name))
 96 | 
 97 |     inputs[0].set_data_from_numpy(image_data)
 98 |     results = triton_client.infer(
 99 |         model_name=args.model_name, inputs=inputs, outputs=outputs
100 |     )
101 | 
102 |     output0_data = results.as_numpy(output_name)
103 |     print(output0_data)
104 |     maxs = np.argmax(output0_data, axis=1)
105 |     print(maxs)
106 |     print("Result is class: {}".format(labels_dict[maxs[0]]))
107 | 


--------------------------------------------------------------------------------
/examples/preprocessing/model_repository/ensemble_python_resnet50/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "ensemble_python_resnet50"
28 | platform: "ensemble"
29 | max_batch_size: 256
30 | input [
31 |   {
32 |     name: "INPUT"
33 |     data_type: TYPE_UINT8
34 |     dims: [ -1 ]
35 |   }
36 | ]
37 | output [
38 |   {
39 |     name: "OUTPUT"
40 |     data_type: TYPE_FP32
41 |     dims: [ 1000 ]
42 |   }
43 | ]
44 | ensemble_scheduling {
45 |   step [
46 |     {
47 |       model_name: "preprocess"
48 |       model_version: -1
49 |       input_map {
50 |         key: "INPUT_0"
51 |         value: "INPUT"
52 |       }
53 |       output_map {
54 |         key: "OUTPUT_0"
55 |         value: "preprocessed_image"
56 |       }
57 |     },
58 |     {
59 |       model_name: "resnet50_trt"
60 |       model_version: -1
61 |       input_map {
62 |         key: "input"
63 |         value: "preprocessed_image"
64 |       }
65 |       output_map {
66 |         key: "output"
67 |         value: "OUTPUT"
68 |       }
69 |     }
70 |   ]
71 | }
72 | 


--------------------------------------------------------------------------------
/examples/preprocessing/model_repository/preprocess/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "preprocess"
28 | backend: "python"
29 | max_batch_size: 256
30 | input [
31 | {
32 |     name: "INPUT_0"
33 |     data_type: TYPE_UINT8
34 |     dims: [ -1 ]
35 | }
36 | ]
37 | 
38 | output [
39 | {
40 |     name: "OUTPUT_0"
41 |     data_type: TYPE_FP32
42 |     dims: [ 3, 224, 224 ]
43 | }
44 | ]
45 | 
46 | instance_group [{ kind: KIND_CPU }]
47 | 
48 | 


--------------------------------------------------------------------------------
/examples/preprocessing/model_repository/resnet50_trt/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "resnet50_trt"
28 | platform: "tensorrt_plan"
29 | max_batch_size: 256
30 | input [
31 | {
32 |     name: "input"
33 |     data_type: TYPE_FP32
34 |     dims: [3, -1, -1 ]
35 | 
36 | }
37 | ]
38 | output[
39 | {
40 |     name: "output"
41 |     data_type: TYPE_FP32
42 |     dims: [ 1000 ]
43 |     label_filename: "labels.txt"
44 | }
45 | ]
46 | 


--------------------------------------------------------------------------------
/examples/preprocessing/onnx_exporter.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import argparse
28 | import os
29 | 
30 | import torch
31 | import torchvision.models as models
32 | 
33 | if __name__ == "__main__":
34 |     parser = argparse.ArgumentParser()
35 |     parser.add_argument("--save", default="model.onnx")
36 |     args = parser.parse_args()
37 | 
38 |     resnet50 = models.resnet50(pretrained=True)
39 |     dummy_input = torch.randn(1, 3, 224, 224)
40 |     resnet50 = resnet50.eval()
41 | 
42 |     torch.onnx.export(
43 |         resnet50,
44 |         dummy_input,
45 |         args.save,
46 |         export_params=True,
47 |         opset_version=10,
48 |         do_constant_folding=True,
49 |         input_names=["input"],
50 |         output_names=["output"],
51 |         dynamic_axes={
52 |             "input": {0: "batch_size", 2: "height", 3: "width"},
53 |             "output": {0: "batch_size"},
54 |         },
55 |     )
56 | 
57 |     print("Saved {}".format(args.save))
58 | 


--------------------------------------------------------------------------------
/examples/pytorch/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | 
29 | import numpy as np
30 | import tritonclient.http as httpclient
31 | from tritonclient.utils import *
32 | 
33 | model_name = "pytorch"
34 | shape = [4]
35 | 
36 | with httpclient.InferenceServerClient("localhost:8000") as client:
37 |     input0_data = np.random.rand(*shape).astype(np.float32)
38 |     input1_data = np.random.rand(*shape).astype(np.float32)
39 |     inputs = [
40 |         httpclient.InferInput(
41 |             "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
42 |         ),
43 |         httpclient.InferInput(
44 |             "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
45 |         ),
46 |     ]
47 | 
48 |     inputs[0].set_data_from_numpy(input0_data)
49 |     inputs[1].set_data_from_numpy(input1_data)
50 | 
51 |     outputs = [
52 |         httpclient.InferRequestedOutput("OUTPUT0"),
53 |         httpclient.InferRequestedOutput("OUTPUT1"),
54 |     ]
55 | 
56 |     response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
57 | 
58 |     result = response.get_response()
59 |     output0_data = response.as_numpy("OUTPUT0")
60 |     output1_data = response.as_numpy("OUTPUT1")
61 | 
62 |     print(
63 |         "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
64 |             input0_data, input1_data, output0_data
65 |         )
66 |     )
67 |     print(
68 |         "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format(
69 |             input0_data, input1_data, output1_data
70 |         )
71 |     )
72 | 
73 |     if not np.allclose(input0_data + input1_data, output0_data):
74 |         print("pytorch example error: incorrect sum")
75 |         sys.exit(1)
76 | 
77 |     if not np.allclose(input0_data - input1_data, output1_data):
78 |         print("pytorch example error: incorrect difference")
79 |         sys.exit(1)
80 | 
81 |     print("PASS: pytorch")
82 |     sys.exit(0)
83 | 


--------------------------------------------------------------------------------
/examples/pytorch/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: "pytorch"
28 | backend: "python"
29 | 
30 | input [
31 |   {
32 |     name: "INPUT0"
33 |     data_type: TYPE_FP32
34 |     dims: [ 4 ]
35 |   }
36 | ]
37 | input [
38 |   {
39 |     name: "INPUT1"
40 |     data_type: TYPE_FP32
41 |     dims: [ 4 ]
42 |   }
43 | ]
44 | output [
45 |   {
46 |     name: "OUTPUT0"
47 |     data_type: TYPE_FP32
48 |     dims: [ 4 ]
49 |   }
50 | ]
51 | output [
52 |   {
53 |     name: "OUTPUT1"
54 |     data_type: TYPE_FP32
55 |     dims: [ 4 ]
56 |   }
57 | ]
58 | 
59 | instance_group [{ kind: KIND_CPU }]
60 | 


--------------------------------------------------------------------------------
/inferentia/qa/Dockerfile.QA:
--------------------------------------------------------------------------------
 1 | # Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #
28 | # Multistage build.
29 | #
30 | ARG BASE_IMAGE=tritonserver
31 | ARG BUILD_IMAGE=tritonserver_build
32 | ARG SDK_IMAGE=tritonserver_sdk
33 | ARG TRITON_PATH=/home/ubuntu
34 | 
35 | FROM ${SDK_IMAGE} AS sdk
36 | FROM $BASE_IMAGE
37 | # Ensure apt-get won't prompt for selecting options
38 | ENV DEBIAN_FRONTEND=noninteractive
39 | # install platform specific packages
40 | RUN if [ $(cat /etc/os-release | grep 'VERSION_ID="20.04"' | wc -l) -ne 0 ]; then \
41 |         apt-get update && \
42 |         apt-get install -y --no-install-recommends \
43 |                 libpng-dev; \
44 |     elif [ $(cat /etc/os-release | grep 'VERSION_ID="18.04"' | wc -l) -ne 0 ]; then \
45 |         apt-get update && \
46 |         apt-get install -y --no-install-recommends \
47 |                 libpng-dev; \
48 |     else \
49 |         echo "Ubuntu version must be either 18.04 or 20.04" && \
50 |         exit 1; \
51 |     fi
52 | 
53 | RUN apt-get update && apt-get install -y --no-install-recommends \
54 |                               python3-dev \
55 |                               python3-pip \
56 |                               build-essential \
57 |                               wget && \
58 |     rm -rf /var/lib/apt/lists/*
59 | 
60 | RUN rm -f /usr/bin/python && \
61 |     ln -s /usr/bin/python3 /usr/bin/python
62 | 
63 | RUN pip3 install --upgrade wheel setuptools && \
64 |     pip3 install --upgrade numpy pillow attrdict future grpcio requests gsutil awscli six grpcio-channelz
65 | 
66 | WORKDIR /opt/tritonserver
67 | # Copy the entire qa repo to the /opt/tritonserver/qa repo
68 | COPY --from=tritonserver_build /workspace/qa qa
69 | COPY --chown=1000:1000 --from=sdk /workspace/install client_tmp
70 | RUN mkdir -p qa/clients && mkdir -p qa/pkgs && \
71 |     cp -a client_tmp/bin/* qa/clients/. && \
72 |     cp client_tmp/lib/libgrpcclient.so qa/clients/. && \
73 |     cp client_tmp/lib/libhttpclient.so qa/clients/. && \
74 |     cp client_tmp/python/*.py qa/clients/. && \
75 |     cp client_tmp/python/triton*.whl qa/pkgs/. && \
76 |     cp client_tmp/java/examples/*.jar qa/clients/. && \
77 |     rm -rf client_tmp
78 | # Create mount paths for lib
79 | RUN mkdir /mylib && mkdir /home/ubuntu
80 | 
81 | ENV TRITON_PATH ${TRITON_PATH}
82 | ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}
83 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | [tool.codespell]
28 | # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
29 | # this is only to allow you to run codespell interactively
30 | skip = "./.git,./.github"
31 | # ignore short words, and typename parameters like OffsetT
32 | ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
33 | # use the 'clear' dictionary for unambiguous spelling mistakes
34 | builtin = "clear"
35 | # disable warnings about binary files and wrong encoding
36 | quiet-level = 3
37 | 
38 | [tool.isort]
39 | profile = "black"
40 | use_parentheses = true
41 | multi_line_output = 3
42 | include_trailing_comma = true
43 | force_grid_wrap = 0
44 | ensure_newline_before_comments = true
45 | line_length = 88
46 | balanced_wrapping = true
47 | indent = "    "
48 | skip = ["build"]
49 | 


--------------------------------------------------------------------------------
/src/correlation_id.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "correlation_id.h"
 28 | 
 29 | namespace triton { namespace backend { namespace python {
 30 | 
 31 | CorrelationId::CorrelationId()
 32 |     : id_string_(""), id_uint_(0), id_type_(CorrelationIdDataType::UINT64)
 33 | {
 34 | }
 35 | 
 36 | CorrelationId::CorrelationId(const std::string& id_string)
 37 |     : id_string_(id_string), id_uint_(0),
 38 |       id_type_(CorrelationIdDataType::STRING)
 39 | {
 40 | }
 41 | 
 42 | CorrelationId::CorrelationId(uint64_t id_uint)
 43 |     : id_string_(""), id_uint_(id_uint), id_type_(CorrelationIdDataType::UINT64)
 44 | {
 45 | }
 46 | 
 47 | CorrelationId::CorrelationId(const CorrelationId& rhs)
 48 | {
 49 |   id_uint_ = rhs.id_uint_;
 50 |   id_type_ = rhs.id_type_;
 51 |   id_string_ = rhs.id_string_;
 52 | }
 53 | 
 54 | CorrelationId::CorrelationId(std::unique_ptr<CorrelationId>& correlation_id_shm)
 55 | {
 56 |   id_uint_ = correlation_id_shm->id_uint_;
 57 |   id_type_ = correlation_id_shm->id_type_;
 58 |   id_string_ = correlation_id_shm->id_string_;
 59 | }
 60 | 
 61 | CorrelationId&
 62 | CorrelationId::operator=(const CorrelationId& rhs)
 63 | {
 64 |   id_uint_ = rhs.id_uint_;
 65 |   id_type_ = rhs.id_type_;
 66 |   id_string_ = rhs.id_string_;
 67 |   return *this;
 68 | }
 69 | 
 70 | void
 71 | CorrelationId::SaveToSharedMemory(
 72 |     std::unique_ptr<SharedMemoryManager>& shm_pool)
 73 | {
 74 |   AllocatedSharedMemory<CorrelationIdShm> correlation_id_shm =
 75 |       shm_pool->Construct<CorrelationIdShm>();
 76 |   correlation_id_shm_ptr_ = correlation_id_shm.data_.get();
 77 | 
 78 |   std::unique_ptr<PbString> id_string_shm =
 79 |       PbString::Create(shm_pool, id_string_);
 80 | 
 81 |   correlation_id_shm_ptr_->id_uint = id_uint_;
 82 |   correlation_id_shm_ptr_->id_string_shm_handle = id_string_shm->ShmHandle();
 83 |   correlation_id_shm_ptr_->id_type = id_type_;
 84 | 
 85 |   // Save the references to shared memory.
 86 |   correlation_id_shm_ = std::move(correlation_id_shm);
 87 |   id_string_shm_ = std::move(id_string_shm);
 88 |   shm_handle_ = correlation_id_shm_.handle_;
 89 | }
 90 | 
 91 | std::unique_ptr<CorrelationId>
 92 | CorrelationId::LoadFromSharedMemory(
 93 |     std::unique_ptr<SharedMemoryManager>& shm_pool,
 94 |     bi::managed_external_buffer::handle_t handle)
 95 | {
 96 |   AllocatedSharedMemory<CorrelationIdShm> correlation_id_shm =
 97 |       shm_pool->Load<CorrelationIdShm>(handle);
 98 |   CorrelationIdShm* correlation_id_shm_ptr = correlation_id_shm.data_.get();
 99 | 
100 |   std::unique_ptr<PbString> id_string_shm = PbString::LoadFromSharedMemory(
101 |       shm_pool, correlation_id_shm_ptr->id_string_shm_handle);
102 | 
103 |   return std::unique_ptr<CorrelationId>(
104 |       new CorrelationId(correlation_id_shm, id_string_shm));
105 | }
106 | 
107 | CorrelationId::CorrelationId(
108 |     AllocatedSharedMemory<CorrelationIdShm>& correlation_id_shm,
109 |     std::unique_ptr<PbString>& id_string_shm)
110 |     : correlation_id_shm_(std::move(correlation_id_shm)),
111 |       id_string_shm_(std::move(id_string_shm))
112 | {
113 |   correlation_id_shm_ptr_ = correlation_id_shm_.data_.get();
114 |   shm_handle_ = correlation_id_shm_.handle_;
115 |   id_string_ = id_string_shm_->String();
116 |   id_uint_ = correlation_id_shm_ptr_->id_uint;
117 |   id_type_ = correlation_id_shm_ptr_->id_type;
118 | }
119 | 
120 | }}};  // namespace triton::backend::python
121 | 


--------------------------------------------------------------------------------
/src/correlation_id.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <string>
30 | 
31 | #include "pb_string.h"
32 | #include "pb_utils.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | 
36 | enum class CorrelationIdDataType { UINT64, STRING };
37 | 
38 | struct CorrelationIdShm {
39 |   bi::managed_external_buffer::handle_t id_string_shm_handle;
40 |   uint64_t id_uint;
41 |   CorrelationIdDataType id_type;
42 | };
43 | 
44 | class CorrelationId {
45 |  public:
46 |   CorrelationId();
47 |   CorrelationId(const std::string& id_string);
48 |   CorrelationId(uint64_t id_uint);
49 |   CorrelationId(const CorrelationId& rhs);
50 |   CorrelationId(std::unique_ptr<CorrelationId>& correlation_id_shm);
51 |   CorrelationId& operator=(const CorrelationId& rhs);
52 | 
53 |   /// Save CorrelationId object to shared memory.
54 |   /// \param shm_pool Shared memory pool to save the CorrelationId object.
55 |   void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
56 | 
57 |   /// Create a CorrelationId object from shared memory.
58 |   /// \param shm_pool Shared memory pool
59 |   /// \param handle Shared memory handle of the CorrelationId.
60 |   /// \return Returns the CorrelationId in the specified handle
61 |   /// location.
62 |   static std::unique_ptr<CorrelationId> LoadFromSharedMemory(
63 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
64 |       bi::managed_external_buffer::handle_t handle);
65 | 
66 |   // Function that help determine exact type of Correlation Id
67 |   CorrelationIdDataType Type() const { return id_type_; }
68 | 
69 |   // Get the value of the CorrelationId based on the type
70 |   const std::string& StringValue() const { return id_string_; }
71 |   uint64_t UnsignedIntValue() const { return id_uint_; }
72 | 
73 |   bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; }
74 | 
75 |  private:
76 |   // The private constructor for creating a CorrelationId object from shared
77 |   // memory.
78 |   CorrelationId(
79 |       AllocatedSharedMemory<CorrelationIdShm>& correlation_id_shm,
80 |       std::unique_ptr<PbString>& id_string_shm);
81 | 
82 |   std::string id_string_;
83 |   uint64_t id_uint_;
84 |   CorrelationIdDataType id_type_;
85 | 
86 |   // Shared Memory Data Structures
87 |   AllocatedSharedMemory<CorrelationIdShm> correlation_id_shm_;
88 |   CorrelationIdShm* correlation_id_shm_ptr_;
89 |   bi::managed_external_buffer::handle_t shm_handle_;
90 |   std::unique_ptr<PbString> id_string_shm_;
91 | };
92 | 
93 | }}};  // namespace triton::backend::python
94 | 


--------------------------------------------------------------------------------
/src/gpu_buffers.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #include "gpu_buffers.h"
28 | 
29 | #include "pb_string.h"
30 | 
31 | namespace triton { namespace backend { namespace python {
32 | GPUBuffersHelper::GPUBuffersHelper()
33 | {
34 |   completed_ = false;
35 | }
36 | 
37 | void
38 | GPUBuffersHelper::AddBuffer(const bi::managed_external_buffer::handle_t& handle)
39 | {
40 |   if (completed_) {
41 |     throw PythonBackendException(
42 |         "It is not possible to add buffers after 'Complete' has been called on "
43 |         "a GPUBuffersHelper.");
44 |   }
45 | 
46 |   buffers_.emplace_back(handle);
47 | }
48 | 
49 | void
50 | GPUBuffersHelper::SetError(
51 |     std::unique_ptr<SharedMemoryManager>& shm_pool, const std::string& error)
52 | {
53 |   error_shm_ = PbString::Create(shm_pool, error);
54 | }
55 | 
56 | void
57 | GPUBuffersHelper::Complete(std::unique_ptr<SharedMemoryManager>& shm_pool)
58 | {
59 |   if (completed_) {
60 |     throw PythonBackendException(
61 |         "Complete has already been called. Complete should only be called "
62 |         "once.");
63 |   }
64 |   gpu_buffers_shm_ = shm_pool->Construct<GPUBuffersShm>();
65 |   if (!error_shm_) {
66 |     buffers_handle_shm_ =
67 |         shm_pool->Construct<bi::managed_external_buffer::handle_t>(
68 |             buffers_.size());
69 |     gpu_buffers_shm_.data_->buffer_count = buffers_.size();
70 |     gpu_buffers_shm_.data_->success = true;
71 |     gpu_buffers_shm_.data_->buffers = buffers_handle_shm_.handle_;
72 |     for (size_t i = 0; i < buffers_.size(); ++i) {
73 |       buffers_handle_shm_.data_.get()[i] = buffers_[i];
74 |     }
75 |   } else {
76 |     gpu_buffers_shm_.data_->success = false;
77 |     gpu_buffers_shm_.data_->error = error_shm_->ShmHandle();
78 |   }
79 |   completed_ = true;
80 | }
81 | 
82 | 
83 | bi::managed_external_buffer::handle_t
84 | GPUBuffersHelper::ShmHandle()
85 | {
86 |   return gpu_buffers_shm_.handle_;
87 | }
88 | 
89 | }}}  // namespace triton::backend::python
90 | 


--------------------------------------------------------------------------------
/src/gpu_buffers.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include "pb_string.h"
30 | #include "pb_utils.h"
31 | #include "scoped_defer.h"
32 | 
33 | namespace triton { namespace backend { namespace python {
34 | 
35 | /// \param success indicating whether the process of fetching the GPU buffers
36 | /// was successful.
37 | /// \param error if success is equal to false, the error object will be set.
38 | /// \param buffers list of buffers elements.
39 | /// \param buffer_count the number of buffers.
40 | struct GPUBuffersShm {
41 |   bool success;
42 |   bi::managed_external_buffer::handle_t error;
43 |   bi::managed_external_buffer::handle_t buffers;
44 |   uint32_t buffer_count;
45 | };
46 | 
47 | /// Helper class to facilitate transfer of metadata associated
48 | /// the GPU buffers in shared memory.
49 | class GPUBuffersHelper {
50 |  public:
51 |   GPUBuffersHelper();
52 |   void AddBuffer(const bi::managed_external_buffer::handle_t& handle);
53 |   void Complete(std::unique_ptr<SharedMemoryManager>& shm_pool);
54 |   void SetError(
55 |       std::unique_ptr<SharedMemoryManager>& shm_pool, const std::string& error);
56 |   bi::managed_external_buffer::handle_t ShmHandle();
57 | 
58 |  private:
59 |   AllocatedSharedMemory<GPUBuffersShm> gpu_buffers_shm_;
60 |   std::vector<bi::managed_external_buffer::handle_t> buffers_;
61 |   AllocatedSharedMemory<bi::managed_external_buffer::handle_t>
62 |       buffers_handle_shm_;
63 |   std::unique_ptr<PbString> error_shm_;
64 |   bool completed_;
65 | };
66 | 
67 | }}};  // namespace triton::backend::python
68 | 


--------------------------------------------------------------------------------
/src/infer_payload.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "infer_payload.h"
 28 | 
 29 | namespace triton { namespace backend { namespace python {
 30 | 
 31 | InferPayload::InferPayload(
 32 |     const bool is_decoupled,
 33 |     std::function<void(std::unique_ptr<InferResponse>)> callback)
 34 |     : is_decoupled_(is_decoupled), is_promise_set_(false), callback_(callback),
 35 |       request_address_(reinterpret_cast<intptr_t>(nullptr))
 36 | {
 37 |   promise_.reset(new std::promise<std::unique_ptr<InferResponse>>());
 38 | }
 39 | 
 40 | void
 41 | InferPayload::SetValue(std::unique_ptr<InferResponse> infer_response)
 42 | {
 43 |   {
 44 |     // Only set value to the promise with the first response. Call the callback
 45 |     // function to send decoupled response to the stub.
 46 |     std::lock_guard<std::mutex> lock(mutex_);
 47 |     if (!is_promise_set_) {
 48 |       is_promise_set_ = true;
 49 |       promise_->set_value(std::move(infer_response));
 50 |       return;
 51 |     }
 52 |   }
 53 |   Callback(std::move(infer_response));
 54 | }
 55 | 
 56 | void
 57 | InferPayload::SetFuture(
 58 |     std::future<std::unique_ptr<InferResponse>>& response_future)
 59 | {
 60 |   response_future = promise_->get_future();
 61 | }
 62 | 
 63 | bool
 64 | InferPayload::IsDecoupled()
 65 | {
 66 |   return is_decoupled_;
 67 | }
 68 | 
 69 | bool
 70 | InferPayload::IsPromiseSet()
 71 | {
 72 |   return is_promise_set_;
 73 | }
 74 | 
 75 | void
 76 | InferPayload::Callback(std::unique_ptr<InferResponse> infer_response)
 77 | {
 78 |   return callback_(std::move(infer_response));
 79 | }
 80 | 
 81 | void
 82 | InferPayload::SetResponseAllocUserp(
 83 |     const ResponseAllocatorUserp& response_alloc_userp)
 84 | {
 85 |   response_alloc_userp_ =
 86 |       std::make_shared<ResponseAllocatorUserp>(response_alloc_userp);
 87 | }
 88 | 
 89 | std::shared_ptr<ResponseAllocatorUserp>
 90 | InferPayload::ResponseAllocUserp()
 91 | {
 92 |   return response_alloc_userp_;
 93 | }
 94 | 
 95 | void
 96 | InferPayload::SetRequestAddress(intptr_t request_address)
 97 | {
 98 |   std::unique_lock<std::mutex> lock(request_address_mutex_);
 99 |   request_address_ = request_address;
100 | }
101 | 
102 | void
103 | InferPayload::SetRequestCancellationFunc(
104 |     const std::function<void(intptr_t)>& request_cancel_func)
105 | {
106 |   request_cancel_func_ = request_cancel_func;
107 | }
108 | 
109 | void
110 | InferPayload::SafeCancelRequest()
111 | {
112 |   std::unique_lock<std::mutex> lock(request_address_mutex_);
113 |   if (request_address_ == 0L) {
114 |     return;
115 |   }
116 | 
117 |   if (request_cancel_func_) {
118 |     request_cancel_func_(request_address_);
119 |   }
120 | }
121 | 
122 | }}}  // namespace triton::backend::python
123 | 


--------------------------------------------------------------------------------
/src/infer_payload.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <functional>
30 | #include <queue>
31 | 
32 | #include "infer_response.h"
33 | #include "pb_preferred_memory.h"
34 | 
35 | namespace triton { namespace backend { namespace python {
36 | 
37 | struct ResponseAllocatorUserp {
38 |   ResponseAllocatorUserp(
39 |       void* shm_pool, const PreferredMemory& preferred_memory)
40 |       : shm_pool(shm_pool), preferred_memory(preferred_memory)
41 |   {
42 |   }
43 |   void* shm_pool;
44 |   PreferredMemory preferred_memory;
45 | };
46 | 
47 | class InferPayload : public std::enable_shared_from_this<InferPayload> {
48 |  public:
49 |   InferPayload(
50 |       const bool is_decouple,
51 |       std::function<void(std::unique_ptr<InferResponse>)> callback);
52 | 
53 |   /// GetPtr should be only called when the InferPayload object is constructed
54 |   /// using a shared pointer. Calling this function in any other circumstance
55 |   /// is undefined behaviour until C++17.
56 |   std::shared_ptr<InferPayload> GetPtr() { return shared_from_this(); }
57 |   void SetValue(std::unique_ptr<InferResponse> infer_response);
58 |   void SetFuture(std::future<std::unique_ptr<InferResponse>>& response_future);
59 |   bool IsDecoupled();
60 |   bool IsPromiseSet();
61 |   void Callback(std::unique_ptr<InferResponse> infer_response);
62 |   void SetResponseAllocUserp(
63 |       const ResponseAllocatorUserp& response_alloc_userp);
64 |   std::shared_ptr<ResponseAllocatorUserp> ResponseAllocUserp();
65 |   void SetRequestAddress(intptr_t request_address);
66 |   void SetRequestCancellationFunc(
67 |       const std::function<void(intptr_t)>& request_cancel_func);
68 |   void SafeCancelRequest();
69 | 
70 |  private:
71 |   std::unique_ptr<std::promise<std::unique_ptr<InferResponse>>> promise_;
72 |   bool is_decoupled_;
73 |   std::mutex mutex_;
74 |   bool is_promise_set_;
75 |   std::function<void(std::unique_ptr<InferResponse>)> callback_;
76 |   std::shared_ptr<ResponseAllocatorUserp> response_alloc_userp_;
77 |   std::mutex request_address_mutex_;
78 |   intptr_t request_address_;
79 |   std::function<void(intptr_t)> request_cancel_func_;
80 | };
81 | 
82 | }}}  // namespace triton::backend::python
83 | 


--------------------------------------------------------------------------------
/src/infer_trace.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "infer_trace.h"
 28 | 
 29 | namespace triton { namespace backend { namespace python {
 30 | 
 31 | InferenceTrace::InferenceTrace(const InferenceTrace& rhs)
 32 | {
 33 |   triton_trace_ = rhs.triton_trace_;
 34 |   trace_context_ = rhs.trace_context_;
 35 | }
 36 | 
 37 | InferenceTrace&
 38 | InferenceTrace::operator=(const InferenceTrace& rhs)
 39 | {
 40 |   triton_trace_ = rhs.triton_trace_;
 41 |   trace_context_ = rhs.trace_context_;
 42 |   return *this;
 43 | }
 44 | 
 45 | InferenceTrace::InferenceTrace(std::unique_ptr<InferenceTrace>& trace_shm)
 46 | {
 47 |   triton_trace_ = trace_shm->triton_trace_;
 48 |   trace_context_ = trace_shm->trace_context_;
 49 | }
 50 | 
 51 | void
 52 | InferenceTrace::SaveToSharedMemory(
 53 |     std::unique_ptr<SharedMemoryManager>& shm_pool)
 54 | {
 55 |   AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm =
 56 |       shm_pool->Construct<InferenceTraceShm>();
 57 |   infer_trace_shm_ptr_ = infer_trace_shm.data_.get();
 58 | 
 59 |   infer_trace_shm_ptr_->triton_trace = triton_trace_;
 60 | 
 61 |   std::unique_ptr<PbString> trace_context_shm =
 62 |       PbString::Create(shm_pool, trace_context_);
 63 | 
 64 |   infer_trace_shm_ptr_->trace_context_shm_handle =
 65 |       trace_context_shm->ShmHandle();
 66 | 
 67 |   // Save the references to shared memory.
 68 |   trace_context_shm_ = std::move(trace_context_shm);
 69 |   infer_trace_shm_ = std::move(infer_trace_shm);
 70 |   shm_handle_ = infer_trace_shm_.handle_;
 71 | }
 72 | 
 73 | std::unique_ptr<InferenceTrace>
 74 | InferenceTrace::LoadFromSharedMemory(
 75 |     std::unique_ptr<SharedMemoryManager>& shm_pool,
 76 |     bi::managed_external_buffer::handle_t handle)
 77 | {
 78 |   AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm =
 79 |       shm_pool->Load<InferenceTraceShm>(handle);
 80 |   InferenceTraceShm* infer_trace_shm_ptr = infer_trace_shm.data_.get();
 81 | 
 82 |   std::unique_ptr<PbString> trace_context_shm = PbString::LoadFromSharedMemory(
 83 |       shm_pool, infer_trace_shm_ptr->trace_context_shm_handle);
 84 | 
 85 |   return std::unique_ptr<InferenceTrace>(
 86 |       new InferenceTrace(infer_trace_shm, trace_context_shm));
 87 | }
 88 | 
 89 | InferenceTrace::InferenceTrace(
 90 |     AllocatedSharedMemory<InferenceTraceShm>& infer_trace_shm,
 91 |     std::unique_ptr<PbString>& trace_context_shm)
 92 |     : infer_trace_shm_(std::move(infer_trace_shm)),
 93 |       trace_context_shm_(std::move(trace_context_shm))
 94 | {
 95 |   infer_trace_shm_ptr_ = infer_trace_shm_.data_.get();
 96 |   shm_handle_ = infer_trace_shm_.handle_;
 97 |   triton_trace_ = infer_trace_shm_ptr_->triton_trace;
 98 |   trace_context_ = trace_context_shm_->String();
 99 | }
100 | 
101 | }}};  // namespace triton::backend::python
102 | 


--------------------------------------------------------------------------------
/src/infer_trace.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <string>
30 | 
31 | #include "pb_string.h"
32 | #include "pb_utils.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | 
36 | struct InferenceTraceShm {
37 |   bi::managed_external_buffer::handle_t trace_context_shm_handle;
38 |   // The address of the 'TRITONSERVER_InferTrace' object.
39 |   void* triton_trace;
40 | };
41 | 
42 | //
43 | // Inference Trace
44 | //
45 | class InferenceTrace {
46 |  public:
47 |   InferenceTrace(void* triton_trace, const std::string& ctxt)
48 |       : triton_trace_(triton_trace), trace_context_(ctxt)
49 |   {
50 |   }
51 |   InferenceTrace() : triton_trace_(nullptr), trace_context_("") {}
52 |   InferenceTrace(const InferenceTrace& rhs);
53 |   InferenceTrace(std::unique_ptr<InferenceTrace>& trace_shm);
54 |   InferenceTrace& operator=(const InferenceTrace& rhs);
55 |   /// Save InferenceTrace object to shared memory.
56 |   /// \param shm_pool Shared memory pool to save the InferenceTrace object.
57 |   void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
58 | 
59 |   /// Create a InferenceTrace object from shared memory.
60 |   /// \param shm_pool Shared memory pool
61 |   /// \param handle Shared memory handle of the InferenceTrace.
62 |   /// \return Returns the InferenceTrace in the specified handle
63 |   /// location.
64 |   static std::unique_ptr<InferenceTrace> LoadFromSharedMemory(
65 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
66 |       bi::managed_external_buffer::handle_t handle);
67 | 
68 |   void* TritonTrace() { return triton_trace_; }
69 |   const std::string& Context() const { return trace_context_; }
70 | 
71 |   bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; }
72 | 
73 |  private:
74 |   // The private constructor for creating a InferenceTrace object from shared
75 |   // memory.
76 |   InferenceTrace(
77 |       AllocatedSharedMemory<InferenceTraceShm>& infer_trace_shm,
78 |       std::unique_ptr<PbString>& trace_context_shm);
79 | 
80 |   void* triton_trace_;
81 |   std::string trace_context_;
82 | 
83 |   // Shared Memory Data Structures
84 |   AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm_;
85 |   InferenceTraceShm* infer_trace_shm_ptr_;
86 |   bi::managed_external_buffer::handle_t shm_handle_;
87 |   std::unique_ptr<PbString> trace_context_shm_;
88 | };
89 | 
90 | }}};  // namespace triton::backend::python
91 | 


--------------------------------------------------------------------------------
/src/libtriton_python.ldscript:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | {
27 |   global:
28 |     TRITONBACKEND_*;
29 |   local: *;
30 | };
31 | 


--------------------------------------------------------------------------------
/src/memory_manager.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "memory_manager.h"
 28 | 
 29 | #include "pb_utils.h"
 30 | 
 31 | 
 32 | namespace triton { namespace backend { namespace python {
 33 | 
 34 | 
 35 | #ifdef TRITON_ENABLE_GPU
 36 | BackendMemoryRecord::BackendMemoryRecord(
 37 |     std::unique_ptr<BackendMemory> backend_memory)
 38 |     : backend_memory_(std::move(backend_memory))
 39 | {
 40 |   release_callback_ = [](void* ptr) {
 41 |     // Do nothing. The backend_memory_ will be destroyed in the destructor.
 42 |   };
 43 | }
 44 | 
 45 | void*
 46 | BackendMemoryRecord::MemoryId()
 47 | {
 48 |   return reinterpret_cast<void*>(backend_memory_->MemoryPtr());
 49 | }
 50 | 
 51 | const std::function<void(void*)>&
 52 | BackendMemoryRecord::ReleaseCallback()
 53 | {
 54 |   return release_callback_;
 55 | }
 56 | #endif
 57 | 
 58 | MemoryManager::MemoryManager(
 59 |     std::unique_ptr<MessageQueue<intptr_t>>&& memory_message_queue)
 60 | {
 61 |   message_queue_ = std::move(memory_message_queue);
 62 |   thread_ = std::thread(&MemoryManager::QueueMonitorThread, this);
 63 | }
 64 | 
 65 | intptr_t
 66 | MemoryManager::AddRecord(std::unique_ptr<MemoryRecord>&& memory_record)
 67 | {
 68 |   std::lock_guard<std::mutex> lock{mu_};
 69 | 
 70 |   intptr_t memory_record_id =
 71 |       reinterpret_cast<intptr_t>(memory_record->MemoryId());
 72 |   records_.emplace(memory_record_id, std::move(memory_record));
 73 | 
 74 |   return memory_record_id;
 75 | }
 76 | 
 77 | void
 78 | MemoryManager::QueueMonitorThread()
 79 | {
 80 |   while (true) {
 81 |     intptr_t memory = message_queue_->Pop();
 82 |     if (memory == 0) {
 83 |       return;
 84 |     }
 85 | 
 86 |     {
 87 |       std::lock_guard<std::mutex> lock{mu_};
 88 |       auto it = records_.find(memory);
 89 |       if (it == records_.end()) {
 90 |         LOG_MESSAGE(
 91 |             TRITONSERVER_LOG_ERROR,
 92 |             "Unexpected memory index received for deallocation.");
 93 |         continue;
 94 |       }
 95 | 
 96 |       // Call the release callback.
 97 |       it->second->ReleaseCallback()(it->second->MemoryId());
 98 |       // it->second.reset();
 99 |       records_.erase(it);
100 |     }
101 |   }
102 | }
103 | 
104 | MemoryManager::~MemoryManager()
105 | {
106 |   // Push a dummy message that will trigger the destruction of the background
107 |   // thread.
108 |   message_queue_->Push(DUMMY_MESSAGE);
109 |   thread_.join();
110 | }
111 | 
112 | }}};  // namespace triton::backend::python
113 | 


--------------------------------------------------------------------------------
/src/memory_manager.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <functional>
30 | #include <mutex>
31 | #include <thread>
32 | #include <unordered_map>
33 | 
34 | #include "message_queue.h"
35 | #include "triton/backend/backend_common.h"
36 | #include "triton/backend/backend_memory.h"
37 | #include "triton/core/tritonserver.h"
38 | 
39 | #ifdef TRITON_ENABLE_GPU
40 | #include <cuda_runtime_api.h>
41 | #endif  // TRITON_ENABLE_GPU
42 | 
43 | 
44 | namespace triton { namespace backend { namespace python {
45 | 
46 | class MemoryRecord {
47 |  public:
48 |   virtual const std::function<void(void*)>& ReleaseCallback() = 0;
49 |   virtual void* MemoryId() = 0;
50 |   virtual ~MemoryRecord() = default;
51 | };
52 | 
53 | #ifdef TRITON_ENABLE_GPU
54 | class BackendMemoryRecord : public MemoryRecord {
55 |  public:
56 |   BackendMemoryRecord(std::unique_ptr<BackendMemory> backend_memory);
57 |   const std::function<void(void*)>& ReleaseCallback() override;
58 |   void* MemoryId() override;
59 |   ~BackendMemoryRecord() { backend_memory_.reset(); }
60 | 
61 |  private:
62 |   std::unique_ptr<BackendMemory> backend_memory_;
63 |   std::function<void(void*)> release_callback_;
64 | };
65 | #endif
66 | 
67 | /// Memory manager class is used primarily for managing the lifetime of GPU
68 | /// tensors in BLS. It mainly consists of a background thread that monitors a
69 | /// message queue in shared memory. Whenever a GPU tensor is created, it will
70 | /// be pushed to the memory manager. The stub process must send a message to the
71 | /// message queue asking the memory manager to deallocate the GPU tensor.
72 | class MemoryManager {
73 |  public:
74 |   MemoryManager(std::unique_ptr<MessageQueue<intptr_t>>&& memory_message_queue);
75 |   intptr_t AddRecord(std::unique_ptr<MemoryRecord>&& memory_record);
76 |   TRITONSERVER_Error* ResetCounter();
77 |   ~MemoryManager();
78 | 
79 |  private:
80 |   std::thread thread_;
81 |   std::unordered_map<intptr_t, std::unique_ptr<MemoryRecord>> records_;
82 |   std::unique_ptr<MessageQueue<intptr_t>> message_queue_;
83 |   void QueueMonitorThread();
84 |   std::mutex mu_;
85 | };
86 | }}};  // namespace triton::backend::python
87 | 


--------------------------------------------------------------------------------
/src/pb_bls_cancel.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #include "pb_bls_cancel.h"
28 | 
29 | #include "pb_stub.h"
30 | 
31 | namespace triton { namespace backend { namespace python {
32 | 
33 | void
34 | PbBLSCancel::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
35 | {
36 |   cancel_shm_ = shm_pool->Construct<CancelBLSRequestMessage>();
37 |   new (&(cancel_shm_.data_->mu)) bi::interprocess_mutex;
38 |   new (&(cancel_shm_.data_->cv)) bi::interprocess_condition;
39 |   cancel_shm_.data_->waiting_on_stub = false;
40 |   cancel_shm_.data_->infer_payload_id = infer_playload_id_;
41 |   cancel_shm_.data_->is_cancelled = is_cancelled_;
42 | }
43 | 
44 | bi::managed_external_buffer::handle_t
45 | PbBLSCancel::ShmHandle()
46 | {
47 |   return cancel_shm_.handle_;
48 | }
49 | 
50 | CancelBLSRequestMessage*
51 | PbBLSCancel::ShmPayload()
52 | {
53 |   return cancel_shm_.data_.get();
54 | }
55 | 
56 | void
57 | PbBLSCancel::Cancel()
58 | {
59 |   // Release the GIL. Python objects are not accessed during the check.
60 |   py::gil_scoped_release gil_release;
61 | 
62 |   std::unique_lock<std::mutex> lk(mu_);
63 |   // The cancelled flag can only move from false to true, not the other way, so
64 |   // it is checked on each query until cancelled and then implicitly cached.
65 |   if (is_cancelled_) {
66 |     return;
67 |   }
68 |   if (!updating_) {
69 |     std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
70 |     if (!stub->StubToParentServiceActive()) {
71 |       LOG_ERROR << "Cannot communicate with parent service";
72 |       return;
73 |     }
74 | 
75 |     stub->EnqueueCancelBLSRequest(this);
76 |     updating_ = true;
77 |   }
78 |   cv_.wait(lk, [this] { return !updating_; });
79 | }
80 | 
81 | void
82 | PbBLSCancel::ReportIsCancelled(bool is_cancelled)
83 | {
84 |   {
85 |     std::lock_guard<std::mutex> lk(mu_);
86 |     is_cancelled_ = is_cancelled;
87 |     updating_ = false;
88 |   }
89 |   cv_.notify_all();
90 | }
91 | 
92 | }}}  // namespace triton::backend::python
93 | 


--------------------------------------------------------------------------------
/src/pb_bls_cancel.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <condition_variable>
30 | #include <mutex>
31 | 
32 | #include "pb_utils.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | 
36 | class PbBLSCancel {
37 |  public:
38 |   PbBLSCancel(void* infer_playload_id)
39 |       : updating_(false), infer_playload_id_(infer_playload_id),
40 |         is_cancelled_(false)
41 |   {
42 |   }
43 |   DISALLOW_COPY_AND_ASSIGN(PbBLSCancel);
44 | 
45 |   void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
46 |   bi::managed_external_buffer::handle_t ShmHandle();
47 |   CancelBLSRequestMessage* ShmPayload();
48 | 
49 |   void Cancel();
50 |   void ReportIsCancelled(bool is_cancelled);
51 | 
52 |  private:
53 |   AllocatedSharedMemory<CancelBLSRequestMessage> cancel_shm_;
54 | 
55 |   std::mutex mu_;
56 |   std::condition_variable cv_;
57 |   bool updating_;
58 | 
59 |   void* infer_playload_id_;
60 |   bool is_cancelled_;
61 | };
62 | 
63 | }}};  // namespace triton::backend::python
64 | 


--------------------------------------------------------------------------------
/src/pb_cancel.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #include "pb_cancel.h"
28 | 
29 | #include "pb_stub.h"
30 | 
31 | namespace triton { namespace backend { namespace python {
32 | 
33 | void
34 | PbCancel::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
35 | {
36 |   cancel_shm_ = shm_pool->Construct<IsCancelledMessage>();
37 |   new (&(cancel_shm_.data_->mu)) bi::interprocess_mutex;
38 |   new (&(cancel_shm_.data_->cv)) bi::interprocess_condition;
39 |   cancel_shm_.data_->waiting_on_stub = false;
40 |   cancel_shm_.data_->response_factory_address = response_factory_address_;
41 |   cancel_shm_.data_->request_address = request_address_;
42 |   cancel_shm_.data_->is_cancelled = is_cancelled_;
43 | }
44 | 
45 | bi::managed_external_buffer::handle_t
46 | PbCancel::ShmHandle()
47 | {
48 |   return cancel_shm_.handle_;
49 | }
50 | 
51 | IsCancelledMessage*
52 | PbCancel::ShmPayload()
53 | {
54 |   return cancel_shm_.data_.get();
55 | }
56 | 
57 | bool
58 | PbCancel::IsCancelled()
59 | {
60 |   // Release the GIL. Python objects are not accessed during the check.
61 |   py::gil_scoped_release gil_release;
62 | 
63 |   std::unique_lock<std::mutex> lk(mu_);
64 |   // The cancelled flag can only move from false to true, not the other way, so
65 |   // it is checked on each query until cancelled and then implicitly cached.
66 |   if (is_cancelled_) {
67 |     return is_cancelled_;
68 |   }
69 |   if (!updating_) {
70 |     std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
71 |     if (!stub->StubToParentServiceActive()) {
72 |       LOG_ERROR << "Cannot communicate with parent service";
73 |       return false;
74 |     }
75 |     stub->EnqueueIsCancelled(this);
76 |     updating_ = true;
77 |   }
78 |   cv_.wait(lk, [this] { return !updating_; });
79 |   return is_cancelled_;
80 | }
81 | 
82 | void
83 | PbCancel::ReportIsCancelled(bool is_cancelled)
84 | {
85 |   {
86 |     std::lock_guard<std::mutex> lk(mu_);
87 |     is_cancelled_ = is_cancelled;
88 |     updating_ = false;
89 |   }
90 |   cv_.notify_all();
91 | }
92 | 
93 | }}}  // namespace triton::backend::python
94 | 


--------------------------------------------------------------------------------
/src/pb_cancel.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <condition_variable>
30 | #include <mutex>
31 | 
32 | #include "pb_utils.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | 
36 | class PbCancel {
37 |  public:
38 |   PbCancel(intptr_t response_factory_address, intptr_t request_address)
39 |       : updating_(false), response_factory_address_(response_factory_address),
40 |         request_address_(request_address), is_cancelled_(false)
41 |   {
42 |   }
43 |   DISALLOW_COPY_AND_ASSIGN(PbCancel);
44 | 
45 |   void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
46 |   bi::managed_external_buffer::handle_t ShmHandle();
47 |   IsCancelledMessage* ShmPayload();
48 | 
49 |   bool IsCancelled();
50 |   void ReportIsCancelled(bool is_cancelled);
51 | 
52 |  private:
53 |   AllocatedSharedMemory<IsCancelledMessage> cancel_shm_;
54 | 
55 |   std::mutex mu_;
56 |   std::condition_variable cv_;
57 |   bool updating_;
58 | 
59 |   intptr_t response_factory_address_;
60 |   intptr_t request_address_;
61 |   bool is_cancelled_;
62 | };
63 | 
64 | }}};  // namespace triton::backend::python
65 | 


--------------------------------------------------------------------------------
/src/pb_env.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | #include <climits>
29 | #include <map>
30 | #include <mutex>
31 | #include <string>
32 | 
33 | #ifdef WIN32
34 | #include <windows.h>
35 | #undef PATH_MAX
36 | #define PATH_MAX MAX_PATH
37 | #endif
38 | namespace triton { namespace backend { namespace python {
39 | 
40 | void ExtractTarFile(std::string& archive_path, std::string& dst_path);
41 | 
42 | bool FileExists(std::string& path);
43 | 
44 | //
45 | // A class that manages Python environments
46 | //
47 | #ifndef _WIN32
48 | class EnvironmentManager {
49 |   std::map<std::string, std::pair<std::string, time_t>> env_map_;
50 |   char base_path_[PATH_MAX + 1];
51 |   std::mutex mutex_;
52 | 
53 |  public:
54 |   EnvironmentManager();
55 | 
56 |   // Extracts the tar.gz file in the 'env_path' if it has not been
57 |   // already extracted.
58 |   std::string ExtractIfNotExtracted(std::string env_path);
59 |   ~EnvironmentManager();
60 | };
61 | #endif
62 | 
63 | }}}  // namespace triton::backend::python
64 | 


--------------------------------------------------------------------------------
/src/pb_error.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #include "pb_error.h"
28 | 
29 | namespace triton { namespace backend { namespace python {
30 | 
31 | TRITONSERVER_Error_Code
32 | PbError::Code()
33 | {
34 |   return code_;
35 | }
36 | 
37 | const std::string&
38 | PbError::Message()
39 | {
40 |   return message_;
41 | }
42 | 
43 | bi::managed_external_buffer::handle_t
44 | PbError::ShmHandle()
45 | {
46 |   return shm_handle_;
47 | }
48 | 
49 | void
50 | PbError::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
51 | {
52 |   message_shm_ = PbString::Create(shm_pool, message_);
53 |   error_shm_ = shm_pool->Construct<PbErrorShm>();
54 |   error_shm_.data_->code = code_;
55 |   error_shm_.data_->message_shm_handle = message_shm_->ShmHandle();
56 |   shm_handle_ = error_shm_.handle_;
57 | }
58 | 
59 | std::shared_ptr<PbError>
60 | PbError::LoadFromSharedMemory(
61 |     std::unique_ptr<SharedMemoryManager>& shm_pool,
62 |     bi::managed_external_buffer::handle_t shm_handle)
63 | {
64 |   AllocatedSharedMemory<PbErrorShm> error_shm =
65 |       shm_pool->Load<PbErrorShm>(shm_handle);
66 |   std::unique_ptr<PbString> message_shm = PbString::LoadFromSharedMemory(
67 |       shm_pool, error_shm.data_->message_shm_handle);
68 | 
69 |   TRITONSERVER_Error_Code code = error_shm.data_->code;
70 |   std::string message = message_shm->String();
71 | 
72 |   return std::shared_ptr<PbError>(new PbError(
73 |       std::move(message_shm), std::move(error_shm), code, std::move(message)));
74 | }
75 | 
76 | PbError::PbError(
77 |     std::shared_ptr<PbString>&& message_shm,
78 |     AllocatedSharedMemory<PbErrorShm>&& error_shm, TRITONSERVER_Error_Code code,
79 |     std::string&& message)
80 |     : message_shm_(std::move(message_shm)), error_shm_(std::move(error_shm)),
81 |       code_(code), message_(std::move(message))
82 | {
83 | }
84 | 
85 | }}}  // namespace triton::backend::python
86 | 


--------------------------------------------------------------------------------
/src/pb_error.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <string>
30 | 
31 | #include "pb_string.h"
32 | #include "pb_utils.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | 
36 | struct PbErrorShm {
37 |   TRITONSERVER_Error_Code code;
38 |   bi::managed_external_buffer::handle_t message_shm_handle;
39 | };
40 | 
41 | class PbError {
42 |  public:
43 |   PbError(
44 |       const std::string& message,
45 |       TRITONSERVER_Error_Code code = TRITONSERVER_ERROR_INTERNAL)
46 |       : code_(code), message_(message)
47 |   {
48 |   }
49 |   DISALLOW_COPY_AND_ASSIGN(PbError);
50 | 
51 |   TRITONSERVER_Error_Code Code();
52 |   const std::string& Message();
53 | 
54 |   void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
55 |   bi::managed_external_buffer::handle_t ShmHandle();
56 | 
57 |   static std::shared_ptr<PbError> LoadFromSharedMemory(
58 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
59 |       bi::managed_external_buffer::handle_t handle);
60 | 
61 |  private:
62 |   PbError(
63 |       std::shared_ptr<PbString>&& message_shm,
64 |       AllocatedSharedMemory<PbErrorShm>&& error_shm,
65 |       TRITONSERVER_Error_Code code, std::string&& message);
66 | 
67 |   std::shared_ptr<PbString> message_shm_;
68 |   AllocatedSharedMemory<PbErrorShm> error_shm_;
69 |   bi::managed_external_buffer::handle_t shm_handle_;
70 | 
71 |   TRITONSERVER_Error_Code code_;
72 |   std::string message_;
73 | };
74 | 
75 | }}};  // namespace triton::backend::python
76 | 


--------------------------------------------------------------------------------
/src/pb_exception.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <exception>
30 | 
31 | namespace triton { namespace backend { namespace python {
32 | 
33 | //
34 | // PythonBackendException
35 | //
36 | // Exception thrown if error occurs in PythonBackend.
37 | //
38 | struct PythonBackendException : std::exception {
39 |   PythonBackendException(const std::string& message) : message_(message) {}
40 | 
41 |   const char* what() const throw() { return message_.c_str(); }
42 | 
43 |   std::string message_;
44 | };
45 | 
46 | }}}  // namespace triton::backend::python
47 | 


--------------------------------------------------------------------------------
/src/pb_log.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "pb_log.h"
 28 | 
 29 | namespace triton { namespace backend { namespace python {
 30 | 
 31 | PbLog::PbLog(
 32 |     const std::string& filename, uint32_t line, const std::string& message,
 33 |     LogLevel level)
 34 |     : filename_(filename), line_(line), message_(message), level_(level)
 35 | {
 36 | }
 37 | 
 38 | const std::string&
 39 | PbLog::Filename()
 40 | {
 41 |   return filename_;
 42 | }
 43 | const std::string&
 44 | PbLog::Message()
 45 | {
 46 |   return message_;
 47 | }
 48 | const LogLevel&
 49 | PbLog::Level()
 50 | {
 51 |   return level_;
 52 | }
 53 | const uint32_t&
 54 | PbLog::Line()
 55 | {
 56 |   return line_;
 57 | }
 58 | 
 59 | PbLogShm::PbLogShm(
 60 |     AllocatedSharedMemory<LogSendMessage>& log_container_shm,
 61 |     std::unique_ptr<PbString>& filename, std::unique_ptr<PbString>& message)
 62 |     : log_container_shm_(std::move(log_container_shm)),
 63 |       filename_pb_string_(std::move(filename)),
 64 |       message_pb_string_(std::move(message))
 65 | {
 66 |   log_container_shm_ptr_ = log_container_shm_.data_.get();
 67 |   log_container_shm_ptr_->filename = filename_pb_string_->ShmHandle();
 68 |   log_container_shm_ptr_->log_message = message_pb_string_->ShmHandle();
 69 | }
 70 | 
 71 | std::unique_ptr<PbLogShm>
 72 | PbLogShm::Create(
 73 |     std::unique_ptr<SharedMemoryManager>& shm_pool, const std::string& filename,
 74 |     const uint32_t& line, const std::string& message, const LogLevel& level)
 75 | {
 76 |   std::unique_ptr<PbString> file_name = PbString::Create(shm_pool, filename);
 77 |   std::unique_ptr<PbString> log_message = PbString::Create(shm_pool, message);
 78 |   AllocatedSharedMemory<LogSendMessage> log_send_message =
 79 |       shm_pool->Construct<LogSendMessage>();
 80 | 
 81 |   LogSendMessage* send_message_payload = log_send_message.data_.get();
 82 |   new (&(send_message_payload->mu)) bi::interprocess_mutex;
 83 |   new (&(send_message_payload->cv)) bi::interprocess_condition;
 84 |   send_message_payload->line = line;
 85 |   send_message_payload->level = level;
 86 | 
 87 |   return std::unique_ptr<PbLogShm>(
 88 |       new PbLogShm(log_send_message, file_name, log_message));
 89 | }
 90 | 
 91 | std::unique_ptr<PbLog>
 92 | PbLogShm::LoadFromSharedMemory(
 93 |     std::unique_ptr<SharedMemoryManager>& shm_pool,
 94 |     bi::managed_external_buffer::handle_t handle)
 95 | {
 96 |   AllocatedSharedMemory<LogSendMessage> log_container_shm =
 97 |       shm_pool->Load<LogSendMessage>(handle);
 98 |   std::unique_ptr<PbString> pb_string_filename = PbString::LoadFromSharedMemory(
 99 |       shm_pool, log_container_shm.data_->filename);
100 |   const std::string& filename = pb_string_filename->String();
101 |   uint32_t line = log_container_shm.data_->line;
102 |   std::unique_ptr<PbString> pb_string_msg = PbString::LoadFromSharedMemory(
103 |       shm_pool, log_container_shm.data_->log_message);
104 |   const std::string& message = pb_string_msg->String();
105 |   LogLevel level = log_container_shm.data_->level;
106 |   return std::unique_ptr<PbLog>(new PbLog(filename, line, message, level));
107 | }
108 | 
109 | bi::managed_external_buffer::handle_t
110 | PbLogShm::ShmHandle()
111 | {
112 |   return log_container_shm_.handle_;
113 | }
114 | 
115 | LogSendMessage*
116 | PbLogShm::LogMessage()
117 | {
118 |   return log_container_shm_ptr_;
119 | }
120 | 
121 | }}}  // namespace triton::backend::python
122 | 


--------------------------------------------------------------------------------
/src/pb_log.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <string>
30 | 
31 | #include "pb_string.h"
32 | #include "pb_utils.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | class PbLog {
36 |  public:
37 |   /// Create a PbLog instance
38 |   PbLog(
39 |       const std::string& filename, uint32_t line, const std::string& message,
40 |       LogLevel level);
41 | 
42 |   /// Get the filename where the log was recorded
43 |   const std::string& Filename();
44 | 
45 |   /// Get the log message
46 |   const std::string& Message();
47 | 
48 |   /// Get the log level of the message
49 |   const LogLevel& Level();
50 | 
51 |   /// Get the line number of the log message
52 |   const uint32_t& Line();
53 | 
54 |  private:
55 |   std::string filename_;
56 |   uint32_t line_;
57 |   std::string message_;
58 |   LogLevel level_;
59 | };
60 | 
61 | class PbLogShm {
62 |  public:
63 |   /// Save PbLog object to shared memory
64 |   static std::unique_ptr<PbLogShm> Create(
65 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
66 |       const std::string& filename, const uint32_t& line,
67 |       const std::string& message, const LogLevel& level);
68 | 
69 |   /// Load PbLog object to shared memory
70 |   static std::unique_ptr<PbLog> LoadFromSharedMemory(
71 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
72 |       bi::managed_external_buffer::handle_t handle);
73 | 
74 |   /// Get the shared memory handle of the saved log message
75 |   bi::managed_external_buffer::handle_t ShmHandle();
76 | 
77 |   /// Get a pointer to the saved log message
78 |   LogSendMessage* LogMessage();
79 | 
80 |  private:
81 |   AllocatedSharedMemory<LogSendMessage> log_container_shm_;
82 |   std::unique_ptr<PbString> filename_pb_string_;
83 |   std::unique_ptr<PbString> message_pb_string_;
84 | 
85 |   LogSendMessage* log_container_shm_ptr_;
86 | 
87 |   PbLogShm(
88 |       AllocatedSharedMemory<LogSendMessage>& log_container_shm,
89 |       std::unique_ptr<PbString>& filename, std::unique_ptr<PbString>& message);
90 | };
91 | }}};  // namespace triton::backend::python
92 | 


--------------------------------------------------------------------------------
/src/pb_map.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "pb_map.h"
 28 | 
 29 | namespace triton { namespace backend { namespace python {
 30 | 
 31 | std::unique_ptr<PbMap>
 32 | PbMap::Create(
 33 |     std::unique_ptr<SharedMemoryManager>& shm_pool,
 34 |     std::unordered_map<std::string, std::string>& map)
 35 | {
 36 |   std::vector<std::unique_ptr<PbString>> strings;
 37 |   AllocatedSharedMemory<DictShm> dict_shm = shm_pool->Construct<DictShm>();
 38 |   dict_shm.data_->length = map.size();
 39 | 
 40 |   AllocatedSharedMemory<PairShm> pair_shms =
 41 |       shm_pool->Construct<PairShm>(map.size());
 42 |   dict_shm.data_->values = pair_shms.handle_;
 43 | 
 44 |   size_t i = 0;
 45 |   for (auto& pair : map) {
 46 |     auto key = PbString::Create(shm_pool, pair.first);
 47 |     auto value = PbString::Create(shm_pool, pair.second);
 48 | 
 49 |     (pair_shms.data_.get())[i].key = key->ShmHandle();
 50 |     (pair_shms.data_.get())[i].value = value->ShmHandle();
 51 | 
 52 |     strings.emplace_back(std::move(key));
 53 |     strings.emplace_back(std::move(value));
 54 |     i++;
 55 |   }
 56 | 
 57 |   return std::unique_ptr<PbMap>(new PbMap(strings, dict_shm, pair_shms, map));
 58 | }
 59 | 
 60 | const std::unordered_map<std::string, std::string>&
 61 | PbMap::UnorderedMap()
 62 | {
 63 |   return map_;
 64 | }
 65 | 
 66 | bi::managed_external_buffer::handle_t
 67 | PbMap::ShmHandle()
 68 | {
 69 |   return dict_handle_;
 70 | }
 71 | 
 72 | std::unique_ptr<PbMap>
 73 | PbMap::LoadFromSharedMemory(
 74 |     std::unique_ptr<SharedMemoryManager>& shm_pool,
 75 |     bi::managed_external_buffer::handle_t handle)
 76 | {
 77 |   AllocatedSharedMemory<DictShm> dict_shm = shm_pool->Load<DictShm>(handle);
 78 |   AllocatedSharedMemory<PairShm> pair_shms =
 79 |       shm_pool->Load<PairShm>(dict_shm.data_->values);
 80 | 
 81 |   std::vector<std::unique_ptr<PbString>> pb_strings;
 82 |   std::unordered_map<std::string, std::string> map;
 83 |   for (size_t i = 0; i < dict_shm.data_->length; i++) {
 84 |     std::unique_ptr<PbString> key = PbString::LoadFromSharedMemory(
 85 |         shm_pool, (pair_shms.data_.get())[i].key);
 86 | 
 87 |     std::unique_ptr<PbString> value = PbString::LoadFromSharedMemory(
 88 |         shm_pool, (pair_shms.data_.get())[i].value);
 89 | 
 90 |     map.insert({key->String(), value->String()});
 91 |     pb_strings.emplace_back(std::move(key));
 92 |     pb_strings.emplace_back(std::move(value));
 93 |   }
 94 | 
 95 |   return std::unique_ptr<PbMap>(
 96 |       new PbMap(pb_strings, dict_shm, pair_shms, map));
 97 | }
 98 | 
 99 | PbMap::PbMap(
100 |     std::vector<std::unique_ptr<PbString>>& strings,
101 |     AllocatedSharedMemory<DictShm>& dict_shm,
102 |     AllocatedSharedMemory<PairShm>& pair_shms,
103 |     std::unordered_map<std::string, std::string>& map)
104 |     : strings_(std::move(strings)), dict_shm_(std::move(dict_shm)),
105 |       pair_shms_(std::move(pair_shms)), map_(std::move(map))
106 | {
107 |   dict_handle_ = dict_shm.handle_;
108 | }
109 | 
110 | }}}  // namespace triton::backend::python
111 | 


--------------------------------------------------------------------------------
/src/pb_map.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <unordered_map>
30 | 
31 | #include "pb_string.h"
32 | #include "shm_manager.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | 
36 | struct PairShm {
37 |   bi::managed_external_buffer::handle_t key;
38 |   bi::managed_external_buffer::handle_t value;
39 | };
40 | 
41 | struct DictShm {
42 |   uint32_t length;
43 |   // `values` point to the location where there are `length` of Pair objects.
44 |   bi::managed_external_buffer::handle_t values;
45 | };
46 | 
47 | 
48 | class PbMap {
49 |  public:
50 |   static std::unique_ptr<PbMap> Create(
51 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
52 |       std::unordered_map<std::string, std::string>& map);
53 |   static std::unique_ptr<PbMap> LoadFromSharedMemory(
54 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
55 |       bi::managed_external_buffer::handle_t handle);
56 |   const std::unordered_map<std::string, std::string>& UnorderedMap();
57 |   bi::managed_external_buffer::handle_t ShmHandle();
58 | 
59 |  private:
60 |   PbMap(
61 |       std::vector<std::unique_ptr<PbString>>& strings,
62 |       AllocatedSharedMemory<DictShm>& dict_shm,
63 |       AllocatedSharedMemory<PairShm>& pair_shms,
64 |       std::unordered_map<std::string, std::string>& map);
65 | 
66 |   std::vector<std::unique_ptr<PbString>> strings_;
67 |   AllocatedSharedMemory<DictShm> dict_shm_;
68 |   AllocatedSharedMemory<PairShm> pair_shms_;
69 |   bi::managed_external_buffer::handle_t dict_handle_;
70 |   std::unordered_map<std::string, std::string> map_;
71 | };
72 | }}}  // namespace triton::backend::python
73 | 


--------------------------------------------------------------------------------
/src/pb_metric_reporter.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | #include "pb_metric_reporter.h"
 28 | 
 29 | #include "triton/backend/backend_common.h"
 30 | 
 31 | namespace triton { namespace backend { namespace python {
 32 | 
 33 | PbMetricReporter::PbMetricReporter(
 34 |     TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
 35 |     const uint32_t request_count,
 36 |     std::shared_ptr<std::vector<TRITONBACKEND_Response*>> responses)
 37 |     : instance_(instance), requests_(requests), request_count_(request_count),
 38 |       responses_(responses), total_batch_size_(0), exec_start_ns_(0),
 39 |       compute_start_ns_(0), compute_end_ns_(0), exec_end_ns_(0),
 40 |       success_status_(true)
 41 | {
 42 | }
 43 | 
 44 | PbMetricReporter::~PbMetricReporter()
 45 | {
 46 |   for (uint32_t r = 0; r < request_count_; ++r) {
 47 |     TRITONBACKEND_Request* request = requests_[r];
 48 | 
 49 |     // Report statistics for the request. Note that there could
 50 |     // still be responses that have not yet been sent but those
 51 |     // cannot be captured in the statistics as they reflect only the
 52 |     // request object. We use the execution start/end time for
 53 |     // compute also so that the entire execution time is associated
 54 |     // with the inference computation.
 55 |     if (responses_) {
 56 |       LOG_IF_ERROR(
 57 |           TRITONBACKEND_ModelInstanceReportStatistics(
 58 |               instance_, request, ((*responses_)[r] != nullptr) /* success */,
 59 |               exec_start_ns_, compute_start_ns_, compute_end_ns_, exec_end_ns_),
 60 |           "failed reporting request statistics");
 61 |     } else {
 62 |       LOG_IF_ERROR(
 63 |           TRITONBACKEND_ModelInstanceReportStatistics(
 64 |               instance_, request, success_status_, exec_start_ns_,
 65 |               compute_start_ns_, compute_end_ns_, exec_end_ns_),
 66 |           "failed reporting request statistics");
 67 |     }
 68 |   }
 69 | 
 70 |   // Report the entire batch statistics. This backend does not support
 71 |   // batching so the total batch size is always 1.
 72 |   if (total_batch_size_ != 0) {
 73 |     LOG_IF_ERROR(
 74 |         TRITONBACKEND_ModelInstanceReportBatchStatistics(
 75 |             instance_, total_batch_size_, exec_start_ns_, compute_start_ns_,
 76 |             compute_end_ns_, exec_end_ns_),
 77 |         "failed reporting batch request statistics");
 78 |   }
 79 | }
 80 | 
 81 | void
 82 | PbMetricReporter::SetBatchStatistics(size_t total_batch_size)
 83 | {
 84 |   total_batch_size_ = total_batch_size;
 85 | }
 86 | 
 87 | void
 88 | PbMetricReporter::SetExecStartNs(const uint64_t exec_start_ns)
 89 | {
 90 |   exec_start_ns_ = exec_start_ns;
 91 | }
 92 | 
 93 | void
 94 | PbMetricReporter::SetComputeStartNs(const uint64_t compute_start_ns)
 95 | {
 96 |   compute_start_ns_ = compute_start_ns;
 97 | }
 98 | 
 99 | void
100 | PbMetricReporter::SetComputeEndNs(const uint64_t compute_end_ns)
101 | {
102 |   compute_end_ns_ = compute_end_ns;
103 | }
104 | 
105 | void
106 | PbMetricReporter::SetExecEndNs(const uint64_t exec_end_ns)
107 | {
108 |   exec_end_ns_ = exec_end_ns;
109 | }
110 | 
111 | void
112 | PbMetricReporter::SetSuccessStatus(const bool success_status)
113 | {
114 |   success_status_ = success_status;
115 | }
116 | 
117 | }}}  // namespace triton::backend::python
118 | 


--------------------------------------------------------------------------------
/src/pb_metric_reporter.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <memory>
30 | #include <string>
31 | #include <vector>
32 | 
33 | #include "triton/core/tritonbackend.h"
34 | 
35 | namespace triton { namespace backend { namespace python {
36 | class PbMetricReporter {
37 |   TRITONBACKEND_ModelInstance* instance_;
38 |   TRITONBACKEND_Request** requests_;
39 |   uint32_t request_count_;
40 |   std::shared_ptr<std::vector<TRITONBACKEND_Response*>> responses_;
41 |   size_t total_batch_size_;
42 |   uint64_t exec_start_ns_;
43 |   uint64_t compute_start_ns_;
44 |   uint64_t compute_end_ns_;
45 |   uint64_t exec_end_ns_;
46 |   bool success_status_;
47 | 
48 |  public:
49 |   PbMetricReporter(
50 |       TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
51 |       const uint32_t request_count,
52 |       std::shared_ptr<std::vector<TRITONBACKEND_Response*>> responses);
53 |   ~PbMetricReporter();
54 |   void SetBatchStatistics(size_t total_batch_size);
55 |   void SetExecStartNs(const uint64_t exec_start_ns);
56 |   void SetComputeStartNs(const uint64_t compute_start_ns);
57 |   void SetComputeEndNs(const uint64_t compute_end_ns);
58 |   void SetExecEndNs(const uint64_t exec_end_ns);
59 |   void SetSuccessStatus(const bool success_status);
60 | };
61 | }}};  // namespace triton::backend::python
62 | 


--------------------------------------------------------------------------------
/src/pb_preferred_memory.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | namespace triton { namespace backend { namespace python {
30 | 
31 | class PreferredMemory {
32 |  public:
33 |   enum MemoryType { kGPU, kCPU, kDefault };
34 | 
35 |   PreferredMemory()
36 |       : preferred_memory_type_(MemoryType::kDefault), preferred_device_id_(0)
37 |   {
38 |   }
39 | 
40 |   PreferredMemory(
41 |       const MemoryType& preferred_memory_type,
42 |       const int64_t& preferred_device_id)
43 |       : preferred_memory_type_(preferred_memory_type),
44 |         preferred_device_id_(preferred_device_id)
45 |   {
46 |   }
47 | 
48 |   MemoryType PreferredMemoryType() { return preferred_memory_type_; }
49 | 
50 |   int64_t PreferredDeviceId() { return preferred_device_id_; }
51 | 
52 |  private:
53 |   MemoryType preferred_memory_type_;
54 |   int64_t preferred_device_id_;
55 | };
56 | 
57 | }}}  // namespace triton::backend::python
58 | 


--------------------------------------------------------------------------------
/src/pb_response_iterator.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <queue>
30 | 
31 | #include "infer_response.h"
32 | #include "pb_bls_cancel.h"
33 | 
34 | namespace triton { namespace backend { namespace python {
35 | 
36 | class ResponseIterator {
37 |  public:
38 |   ResponseIterator(const std::shared_ptr<InferResponse>& response);
39 |   ~ResponseIterator();
40 | 
41 |   std::shared_ptr<InferResponse> Next();
42 |   void Iter();
43 |   void EnqueueResponse(std::shared_ptr<InferResponse> infer_response);
44 |   void* Id();
45 |   void Clear();
46 |   std::vector<std::shared_ptr<InferResponse>> GetExistingResponses();
47 |   void Cancel();
48 | 
49 |  private:
50 |   std::vector<std::shared_ptr<InferResponse>> responses_;
51 |   std::queue<std::shared_ptr<InferResponse>> response_buffer_;
52 |   std::mutex mu_;
53 |   std::condition_variable cv_;
54 |   void* id_;
55 |   bool is_finished_;
56 |   bool is_cleared_;
57 |   size_t idx_;
58 |   std::shared_ptr<PbBLSCancel> pb_bls_cancel_;
59 | };
60 | 
61 | }}}  // namespace triton::backend::python
62 | 


--------------------------------------------------------------------------------
/src/pb_string.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include "shm_manager.h"
30 | 
31 | namespace triton { namespace backend { namespace python {
32 | 
33 | struct StringShm {
34 |   bi::managed_external_buffer::handle_t data;
35 |   size_t length;
36 | };
37 | 
38 | class PbString {
39 |  public:
40 |   static std::unique_ptr<PbString> Create(
41 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
42 |       const std::string& string);
43 |   static std::unique_ptr<PbString> Create(
44 |       const std::string& string, char* data_shm,
45 |       bi::managed_external_buffer::handle_t handle);
46 |   static std::unique_ptr<PbString> LoadFromSharedMemory(
47 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
48 |       bi::managed_external_buffer::handle_t handle);
49 |   static std::unique_ptr<PbString> LoadFromSharedMemory(
50 |       bi::managed_external_buffer::handle_t handle, char* data_shm);
51 |   static std::size_t ShmStructSize(const std::string& string);
52 | 
53 |   char* MutableString() { return string_shm_ptr_; }
54 |   std::string String()
55 |   {
56 |     return std::string(
57 |         string_shm_ptr_, string_shm_ptr_ + string_container_shm_ptr_->length);
58 |   }
59 |   bi::managed_external_buffer::handle_t ShmHandle();
60 |   std::size_t Size();
61 | 
62 |  private:
63 |   AllocatedSharedMemory<StringShm> string_container_shm_;
64 |   StringShm* string_container_shm_ptr_;
65 | 
66 |   AllocatedSharedMemory<char> string_shm_;
67 |   char* string_shm_ptr_;
68 | 
69 |   bi::managed_external_buffer::handle_t string_handle_;
70 | 
71 |   PbString(
72 |       AllocatedSharedMemory<StringShm>& string_container_shm,
73 |       AllocatedSharedMemory<char>& string_shm);
74 | 
75 |   PbString(
76 |       StringShm* string_container_shm, char* string_shm,
77 |       bi::managed_external_buffer::handle_t handle);
78 | };
79 | 
80 | }}}  // namespace triton::backend::python
81 | 


--------------------------------------------------------------------------------
/src/pb_stub_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #include <dlpack/dlpack.h>
28 | #include <pybind11/embed.h>
29 | #include <pybind11/numpy.h>
30 | #include <pybind11/stl.h>
31 | 
32 | #include "triton/core/tritonserver.h"
33 | 
34 | namespace py = pybind11;
35 | namespace triton { namespace backend { namespace python {
36 | 
37 | /// Convert numpy dtype to triton dtype
38 | /// \param data_type numpy data type to be converted.
39 | /// \return equivalent triton dtype
40 | TRITONSERVER_DataType numpy_to_triton_type(py::object data_type);
41 | 
42 | /// Convert triton dtype to numpy dtype
43 | /// \param data_type triton dtype to be converted.
44 | /// \return equivalent numpy data type.
45 | py::object triton_to_numpy_type(TRITONSERVER_DataType data_type);
46 | 
47 | /// Convert triton dtype to dlpack dtype
48 | /// \param data_type triton dtype to be converted
49 | /// \return equivalent DLPack data type.
50 | DLDataType triton_to_dlpack_type(TRITONSERVER_DataType data_type);
51 | 
52 | /// Convert dlpack type to triton type
53 | /// \param data_type triton dtype to be converted
54 | /// \return equivalent Triton dtype
55 | TRITONSERVER_DataType dlpack_to_triton_type(const DLDataType& data_type);
56 | 
57 | /// Convert triton data to pybind data type.
58 | /// \param data_type triton dtype to be converted.
59 | /// \return equivalent pybind numpy dtype.
60 | py::dtype triton_to_pybind_dtype(TRITONSERVER_DataType data_type);
61 | }}}  // namespace triton::backend::python
62 | 


--------------------------------------------------------------------------------
/src/request_executor.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <memory>
30 | 
31 | #include "infer_payload.h"
32 | #include "infer_request.h"
33 | #include "infer_response.h"
34 | 
35 | namespace triton { namespace backend { namespace python {
36 | 
37 | TRITONSERVER_Error* CreateTritonErrorFromException(
38 |     const PythonBackendException& pb_exception);
39 | 
40 | struct RequestCompletionUserp {
41 |   std::shared_ptr<InferPayload> infer_payload;
42 |   RequestCompletionUserp(std::shared_ptr<InferPayload>& infer_payload)
43 |       : infer_payload(infer_payload){};
44 | };
45 | 
46 | class RequestExecutor {
47 |   TRITONSERVER_ResponseAllocator* response_allocator_ = nullptr;
48 |   TRITONSERVER_Server* server_;
49 |   std::unique_ptr<SharedMemoryManager>& shm_pool_;
50 | 
51 |  public:
52 |   std::future<std::unique_ptr<InferResponse>> Infer(
53 |       std::shared_ptr<InferRequest>& infer_request,
54 |       std::shared_ptr<InferPayload>& infer_payload);
55 | 
56 |   RequestExecutor(
57 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
58 |       TRITONSERVER_Server* server);
59 | 
60 |   ~RequestExecutor();
61 | };
62 | }}}  // namespace triton::backend::python
63 | 


--------------------------------------------------------------------------------
/src/response_sender.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | 
29 | #include <atomic>
30 | #include <mutex>
31 | 
32 | #include "infer_response.h"
33 | #include "pb_cancel.h"
34 | #include "shm_manager.h"
35 | 
36 | namespace triton { namespace backend { namespace python {
37 | 
38 | class ResponseSender {
39 |  public:
40 |   ResponseSender(
41 |       intptr_t request_address, intptr_t response_factory_address,
42 |       bool const* is_decoupled,
43 |       const std::set<std::string>& requested_output_names,
44 |       std::unique_ptr<SharedMemoryManager>& shm_pool,
45 |       const std::shared_ptr<PbCancel>& pb_cancel);
46 |   intptr_t ResponseFactory() { return response_factory_address_; }
47 |   ~ResponseSender();
48 |   void Send(std::shared_ptr<InferResponse> response, const uint32_t flags);
49 |   bool IsCancelled();
50 |   void UpdateStateAndCounters(InferResponse* response, const uint32_t flags);
51 | 
52 |   // Can be useful at stopping the model from sending any more responses.
53 |   void Close();
54 |   bool IsClosed();
55 | 
56 |  private:
57 |   void DeleteResponseFactory();
58 | 
59 |   intptr_t request_address_;
60 |   intptr_t response_factory_address_;
61 |   bool const* is_decoupled_;
62 |   std::set<std::string> requested_output_names_;
63 |   std::unique_ptr<SharedMemoryManager>& shm_pool_;
64 |   std::shared_ptr<PbCancel> pb_cancel_;
65 | 
66 |   std::mutex mu_;
67 |   bool closed_;
68 |   size_t number_of_response_sent_;
69 | 
70 |   std::atomic<bool> response_factory_deleted_;
71 | };
72 | }}}  // namespace triton::backend::python
73 | 


--------------------------------------------------------------------------------
/src/scoped_defer.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #include "scoped_defer.h"
28 | 
29 | namespace triton { namespace backend { namespace python {
30 | ScopedDefer::ScopedDefer(std::function<void()> task)
31 | {
32 |   task_ = task;
33 |   done_ = false;
34 | }
35 | 
36 | void
37 | ScopedDefer::Complete()
38 | {
39 |   if (!done_) {
40 |     task_();
41 |     done_ = true;
42 |   }
43 | }
44 | 
45 | ScopedDefer::~ScopedDefer()
46 | {
47 |   if (!done_) {
48 |     task_();
49 |   }
50 | }
51 | 
52 | }}};  // namespace triton::backend::python
53 | 


--------------------------------------------------------------------------------
/src/scoped_defer.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #pragma once
28 | #include <functional>
29 | 
30 | namespace triton { namespace backend { namespace python {
31 | class ScopedDefer {
32 |  public:
33 |   ScopedDefer(std::function<void()> task);
34 |   ~ScopedDefer();
35 |   void Complete();
36 | 
37 |  private:
38 |   std::function<void()> task_;
39 |   bool done_;
40 | };
41 | 
42 | }}}  // namespace triton::backend::python
43 | 


--------------------------------------------------------------------------------
/src/shm_monitor/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | cmake_minimum_required (VERSION 3.18)
28 | 
29 | pybind11_add_module(
30 |   triton-shm-monitor
31 |   EXCLUDE_FROM_ALL
32 |   ./shm_monitor.cc
33 |   ../shm_manager.h
34 |   ../shm_manager.cc
35 | )
36 | 
37 | target_link_libraries(
38 |   triton-shm-monitor
39 |   PRIVATE
40 |     -lrt # shared memory
41 | )
42 | 
43 | set_property(TARGET triton-shm-monitor PROPERTY OUTPUT_NAME triton_shm_monitor)
44 | 
45 | install(
46 |   TARGETS
47 |     triton-shm-monitor
48 |   LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/python OPTIONAL
49 | )
50 | 


--------------------------------------------------------------------------------
/src/shm_monitor/shm_monitor.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Redistribution and use in source and binary forms, with or without
 4 | // modification, are permitted provided that the following conditions
 5 | // are met:
 6 | //  * Redistributions of source code must retain the above copyright
 7 | //    notice, this list of conditions and the following disclaimer.
 8 | //  * Redistributions in binary form must reproduce the above copyright
 9 | //    notice, this list of conditions and the following disclaimer in the
10 | //    documentation and/or other materials provided with the distribution.
11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | //    contributors may be used to endorse or promote products derived
13 | //    from this software without specific prior written permission.
14 | //
15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | #include <pybind11/pybind11.h>
28 | 
29 | #include "../shm_manager.h"
30 | 
31 | namespace triton { namespace backend { namespace python {
32 | namespace py = pybind11;
33 | 
34 | PYBIND11_MODULE(triton_shm_monitor, m)
35 | {
36 |   py::class_<SharedMemoryManager>(m, "SharedMemoryManager")
37 |       .def(py::init<const std::string&>())
38 |       .def("free_memory", &SharedMemoryManager::FreeMemory);
39 | }
40 | 
41 | }}}  // namespace triton::backend::python
42 | 


--------------------------------------------------------------------------------