├── .clang-format ├── .dockerignore ├── .gitattributes ├── .github └── workflows │ └── pre-commit.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── Example_Models.md ├── LICENSE ├── README.md ├── SKLearn_and_cuML.md ├── build.sh ├── build_conda_env_container.sh ├── ci ├── gitlab │ └── build.sh └── local │ └── build.sh ├── cmake ├── modules │ └── ConfigureCUDA.cmake └── thirdparty │ ├── get_cuml.cmake │ ├── get_gtest.cmake │ ├── get_rapids-triton.cmake │ └── get_treelite.cmake ├── conda └── environments │ ├── buildpy.yml │ ├── rapids_triton_dev.yml │ ├── triton_benchmark.yml │ ├── triton_test.yml │ └── triton_test_no_client.yml ├── docs ├── build.md ├── explainability.md ├── install.md ├── model_config.md ├── model_support.md ├── repo_overview.md ├── sklearn_and_cuml.md ├── tests.md └── workflow.md ├── notebooks ├── README.md ├── categorical-fraud-detection │ ├── Fraud_Detection_Example.ipynb │ ├── README.md │ └── environment.yml ├── faq │ ├── FAQs.ipynb │ ├── README.md │ └── environment.yml └── simple-xgboost │ ├── README.md │ └── simple_xgboost_example.ipynb ├── ops ├── Dockerfile ├── E2E.md ├── gpuci_conda_retry ├── gpuci_mamba_retry └── move_deps.py ├── pyproject.toml ├── qa ├── BENCHMARKS.md ├── L0_e2e │ ├── conftest.py │ ├── generate_example_model.py │ └── test_model.py ├── benchmark_repo │ ├── large_model-cpu │ │ ├── 1 │ │ │ └── xgboost.json │ │ └── config.pbtxt │ ├── large_model │ │ ├── 1 │ │ │ └── xgboost.json │ │ └── config.pbtxt │ ├── small_model-cpu │ │ ├── 1 │ │ │ └── xgboost.json │ │ └── config.pbtxt │ └── small_model │ │ ├── 1 │ │ └── xgboost.json │ │ └── config.pbtxt ├── collate_benchmarks.py ├── entrypoint.sh ├── generate_example_models.sh ├── run-clang-format.py ├── run_benchmarks.sh └── run_tests.sh ├── scripts ├── convert_cuml.py ├── convert_sklearn.py └── environment.yml └── src ├── api.cc ├── cpu_forest_model.h ├── cpu_treeshap_model.h ├── fil_config.h ├── forest_model.h ├── gpu_forest_model.h ├── gpu_treeshap_model.h ├── herring ├── model.hpp ├── node.hpp ├── omp_helpers.hpp ├── output_ops.hpp ├── tl_helpers.hpp ├── tree.hpp └── type_helpers.hpp ├── linear_treeshap_constants.h ├── model.h ├── names.h ├── serialization.h ├── shared_state.h ├── tl_config.h ├── tl_model.h ├── tl_utils.h └── treeshap_model.h /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | 4 | IndentWidth: 2 5 | ContinuationIndentWidth: 4 6 | UseTab: Never 7 | MaxEmptyLinesToKeep: 2 8 | 9 | SortIncludes: true 10 | CompactNamespaces: true 11 | ReflowComments: true 12 | 13 | DerivePointerAlignment: false 14 | PointerAlignment: Left 15 | 16 | AllowShortIfStatementsOnASingleLine: false 17 | AllowShortBlocksOnASingleLine: false 18 | AllowShortFunctionsOnASingleLine: Inline 19 | 20 | AlwaysBreakAfterReturnType: TopLevelDefinitions 21 | AlignAfterOpenBracket: AlwaysBreak 22 | BreakBeforeBraces: Custom 23 | BraceWrapping: 24 | AfterClass: false 25 | AfterControlStatement: false 26 | AfterEnum: false 27 | AfterFunction: true 28 | AfterNamespace: false 29 | AfterStruct: false 30 | AfterUnion: false 31 | BeforeCatch: true 32 | 33 | BinPackArguments: true 34 | BinPackParameters: true 35 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 36 | 37 | IndentCaseLabels: true -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | build/ 4 | ops/*Dockerfile* 5 | qa/L0_e2e/model_repository 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fil_backend/c2fa763d472712737815646ba508d9fa3663ba4a/.gitattributes -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | name: pre-commit 28 | 29 | on: 30 | pull_request: 31 | push: 32 | branches: [main] 33 | 34 | jobs: 35 | pre-commit: 36 | runs-on: ubuntu-22.04 37 | steps: 38 | - uses: actions/checkout@v3 39 | - uses: actions/setup-python@v3 40 | - uses: pre-commit/action@v3.0.0 41 | 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | install/ 3 | *.so 4 | ops/stage 5 | qa/L0_e2e/model_repository 6 | qa/L0_e2e/cpu_model_repository 7 | qa/logs 8 | qa/benchmark_output 9 | *.ipynb_checkpoints* 10 | notebooks/categorical-fraud-detection/ieee-fraud-detection.zip 11 | notebooks/categorical-fraud-detection/sample_submission.csv 12 | notebooks/categorical-fraud-detection/test_identity.csv 13 | notebooks/categorical-fraud-detection/test_transaction.csv 14 | notebooks/categorical-fraud-detection/train_identity.csv 15 | notebooks/categorical-fraud-detection/train_transaction.csv 16 | notebooks/categorical-fraud-detection/model_repository 17 | notebooks/faq/data/ 18 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | repos: 28 | - repo: https://github.com/timothycrosley/isort 29 | rev: 5.12.0 30 | hooks: 31 | - id: isort 32 | additional_dependencies: [toml] 33 | - repo: https://github.com/psf/black 34 | rev: 23.1.0 35 | hooks: 36 | - id: black 37 | types_or: [python, cython] 38 | - repo: https://github.com/PyCQA/flake8 39 | rev: 5.0.4 40 | hooks: 41 | - id: flake8 42 | args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501] 43 | types_or: [python, cython] 44 | - repo: https://github.com/pre-commit/mirrors-clang-format 45 | rev: v16.0.5 46 | hooks: 47 | - id: clang-format 48 | types_or: [c, c++, cuda, proto, textproto, java] 49 | args: ["-fallback-style=none", "-style=file", "-i"] 50 | - repo: https://github.com/codespell-project/codespell 51 | rev: v2.2.4 52 | hooks: 53 | - id: codespell 54 | additional_dependencies: [tomli] 55 | args: ["--toml", "pyproject.toml"] 56 | exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$) 57 | # More details about these pre-commit hooks here: 58 | # https://pre-commit.com/hooks.html 59 | - repo: https://github.com/pre-commit/pre-commit-hooks 60 | rev: v4.4.0 61 | hooks: 62 | - id: check-case-conflict 63 | - id: check-executables-have-shebangs 64 | - id: check-merge-conflict 65 | - id: check-json 66 | - id: check-toml 67 | - id: check-yaml 68 | - id: check-shebang-scripts-are-executable 69 | - id: end-of-file-fixer 70 | types_or: [c, c++, cuda, proto, textproto, java, python] 71 | - id: mixed-line-ending 72 | - id: requirements-txt-fixer 73 | - id: trailing-whitespace 74 | 75 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the Triton FIL backend 2 | 3 | ## How to Contribute 4 | You can help improve the Triton FIL backend in any of the following ways: 5 | - Submitting a bug report, feature request or documentation issue 6 | - Proposing and implementing a new feature 7 | - Implementing a feature or bug-fix for an outstanding issue 8 | 9 | ### Bug reports 10 | When submitting a bug report, please include a *minimum* *reproducible* 11 | example. Ideally, this should be a snippet of code that other developers can 12 | copy, paste, and immediately run to try to reproduce the error. Please: 13 | - Do include import statements and any other code necessary to immediately run 14 | your example 15 | - Avoid examples that require other developers to download models or data 16 | unless you cannot reproduce the problem with synthetically-generated data 17 | 18 | ### Code Contributions 19 | To contribute code to this project, please follow these steps: 20 | 1. Find an issue to work on or submit an issue documenting the problem you 21 | would like to work on. 22 | 2. Comment on the issue saying that you plan to work on it. 23 | 3. Review the implementation details section below for information to help you 24 | make your changes in a way that is consistent with the rest of the codebase. 25 | 4. Code! 26 | 5. Create your pull request. 27 | 6. Wait for other developers to review your code and update your PR as needed. 28 | 7. Once a PR is approved, it will be merged into the main branch. 29 | 30 | #### Signing Your Work 31 | * We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license. 32 | * Any contribution which contains commits that are not Signed-Off will not be accepted. 33 | * To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes: 34 | ```bash 35 | $ git commit -s -m "Add cool feature." 36 | ``` 37 | This will append the following to your commit message: 38 | ``` 39 | Signed-off-by: Your Name 40 | ``` 41 | * Full text of the DCO: 42 | ``` 43 | Developer Certificate of Origin 44 | Version 1.1 45 | 46 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 47 | 1 Letterman Drive 48 | Suite D4700 49 | San Francisco, CA, 94129 50 | 51 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. 52 | ``` 53 | ``` 54 | Developer's Certificate of Origin 1.1 55 | 56 | By making a contribution to this project, I certify that: 57 | 58 | (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or 59 | 60 | (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or 61 | 62 | (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. 63 | 64 | (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. 65 | ``` 66 | 67 | ## Developer Workflow Documentation 68 | Additional information useful to contributors is available in the 69 | following sections: 70 | 71 | - [Development workflow](docs/workflow.md) 72 | - [Overview of the repo](docs/repo_overview.md) 73 | - [Build instructions](docs/build.md) 74 | - [Running tests](docs/tests.md) 75 | 76 | ## Style 77 | Contributions to the FIL backend should: 78 | - Adhere to [Almost-Always-Auto](https://herbsutter.com/2013/08/12/gotw-94-solution-aaa-style-almost-always-auto/) style 79 | - Prefer STL algorithms to [raw loops](https://belaycpp.com/2021/06/22/dont-use-raw-loops/) wherever possible 80 | - Use C++ types except where explicitly interfacing with C code (e.g. 81 | `std::size_t` as opposed to `size_t`) 82 | - Avoid depending on transitive includes 83 | -------------------------------------------------------------------------------- /Example_Models.md: -------------------------------------------------------------------------------- 1 | # Generating Example Models 2 | 3 | The FIL backend's testing infrastructure includes [a 4 | script](https://github.com/triton-inference-server/fil_backend/blob/main/qa/L0_e2e/generate_example_model.py) 5 | for generating example models, putting them in the correct directory layout, 6 | and generating an associated config file. This can be helpful both for 7 | providing a template for your own models and for testing your Triton 8 | deployment. 9 | 10 | ## Prerequisites 11 | To use the model generation script, you will need to install 12 | [cuML](https://rapids.ai/start.html#rapids-release-selector) and whatever 13 | forest model framework you wish to use 14 | ([LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html), 15 | [XGBoost](https://xgboost.readthedocs.io/en/latest/install.html), or 16 | [Scikit-Learn](https://scikit-learn.org/stable/install.html)). For convenience, 17 | a Conda environment [config 18 | file](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/triton_test.yml) 19 | is included in the FIL backend repo which can be used to install all of these 20 | frameworks: 21 | 22 | ```bash 23 | git clone https://github.com/triton-inference-server/fil_backend.git 24 | cd fil_backend 25 | conda env create -f conda/environments/triton_test.yml 26 | conda activate triton_test 27 | ``` 28 | 29 | ## Usage 30 | 31 | The simplest possible invocation of the example generation script is just: 32 | 33 | ```bash 34 | python qa/L0_e2e/generate_example_model.py 35 | ``` 36 | 37 | This will create an example XGBoost model, serialize it to XGBoost's binary 38 | format and store it (with full configuration) within the 39 | `qa/L0_e2e/model_repository` directory. 40 | 41 | ### Arguments 42 | You can provide additional arguments to the model generation script to control 43 | all details of the generated model. Available arguments are described in the 44 | following sections. 45 | 46 | #### Model framework 47 | - `--type`: Takes one of `lightgbm`, `xgboost`, `sklearn` or `cuml` as argument 48 | and determines what framework will be used to train the model. Defaults to 49 | `xgboost`. 50 | - `--format`: Determines what format to serialize the model to for frameworks 51 | which support multiple serialization formats. One of `xgboost`, 52 | `xgboost_json`, `lightgbm`, or `pickle`. If omitted, this will default to a 53 | valid choice for the chosen framework. 54 | 55 | #### Model metadata 56 | - `--name`: An arbitrary string used to identify the generated model. If 57 | omitted, a string will be generated from the model type, serialization 58 | format, and task. 59 | - `--repo`: Path to the directory where you wish to set up your model 60 | repository. This argument is required if this script is invoked outside of 61 | the FIL backend Git repository. If omitted, it will default to 62 | `qa/L0_e2e/model_repository` from the Git repository root. 63 | 64 | #### Model details 65 | - `--task`: One of `classification` or `regression` indicating the type of 66 | inference task for this model. 67 | - `--depth`: The maximum depth for trees in this model. 68 | - `--trees`: The maximum number of trees in this model. 69 | - `--classes`: The number of classes for classification models. 70 | - `--features`: The number of features used for each sample. 71 | - `--samples`: The number of randomly-generated samples to use when training 72 | the example model. 73 | - `--threshold`: The threshold for classification decisions in classifier 74 | models. 75 | - `--predict_proba`: A flag indicating that class scores should be outputted 76 | instead of class IDs for classifiers. 77 | 78 | #### Triton server controls 79 | - `--batching_window`: Maximum time in microseconds for Triton to spend 80 | gathering samples for a single batch 81 | 82 | ### SKLearn and cuML models 83 | Note that this example script generates only the model pickle file for 84 | Scikit-Learn and cuML models. These must be converted to Treelite checkpoints 85 | as described in the [documentation for using these 86 | frameworks](https://github.com/triton-inference-server/fil_backend.git). An 87 | example invocation for Scikit-Learn is shown below: 88 | 89 | ```bash 90 | python qa/L0_e2e/generate_example_model.py --type sklearn --name skl_example 91 | ./scripts/convert_sklearn qa/L0_e2e/model_repository/skl_example/1/model.pkl 92 | ``` 93 | ## Testing example models 94 | Once you have generated an example model (or set up a real model), you can test 95 | it using the `qa/L0_e2e/test_model.py` script. After [starting the 96 | server](https://github.com/triton-inference-server/fil_backend#starting-the-server), 97 | the simplest invocation of this script is just: 98 | ```bash 99 | python qa/L0_e2e/test_model.py --name $NAME_OF_MODEL 100 | ``` 101 | This will run a number of randomly-generated samples through your model both in 102 | Triton and locally. The results will be compared to ensure they are the same. 103 | At the end of the run, some throughput and latency numbers will be printed to 104 | the terminal, but please note that these numbers are **not indicative of 105 | real-world throughput and latency performance**. This script is designed to 106 | rigorously test unlikely corner cases in ways which will hurt reported 107 | performance. The output statistics are provided merely to help catch 108 | performance regressions between different versions or deployments of Triton and 109 | are meaningful only when compared to other test runs with the same parameters. 110 | To get an accurate picture of model throughput and latency, use Triton's [Model 111 | Analyzer](https://github.com/triton-inference-server/model_analyzer) which 112 | includes an easy-to-use tool for meaningfully testing model performance. 113 | 114 | ### Additional arguments 115 | 116 | - `--name`: The name of the model to test. 117 | - `--repo`: The path to the model repository. If this script is not invoked 118 | from within the FIL backend Git repository, this option must be specified. It 119 | defaults to `qa/L0_e2e/model_repository`. 120 | - `--host`: The URL for the Triton server. Defaults to `localhost`. 121 | - `--http_port`: If using a non-default HTTP port for Triton, the correct port 122 | can be specified here. 123 | - `--grpc_port`: If using a non-default GRPC port for Triton, the correct port 124 | can be specified here. 125 | - `--protocol`: While the test script will do brief tests of both HTTP and 126 | GRPC, the specified protocol will be used for more intensive testing. 127 | - `--samples`: The total number of samples to test for each batch size 128 | provided. Defaults to 8192. 129 | - `--batch_size`: This argument can take an arbitrary number of values. For 130 | each provided value, all samples will be broken down into batches of the 131 | given size and the model will be evaluated against all such batches. 132 | - `--shared_mem`: This argument can take up to two values. These values can be 133 | either `None` or `cuda` to indicate whether the tests should use no shared 134 | memory or CUDA shared memory. If both are given, tests will alternate between 135 | the two. Defaults to both. 136 | - `--concurrency`: The number of concurrent threads to use for generating 137 | requests. Higher values will provide a more rigorous test of the server's 138 | operation when processing many simultaneous requests. 139 | - `--timeout`: The longest to wait for all samples to be processed for a 140 | particular batch size. The appropriate value depends on your hardware, 141 | networking configuration, and total number of samples. 142 | - `--retries`: The number of times to retry requests in order to handle network 143 | failures. 144 | can be specified here. 145 | -------------------------------------------------------------------------------- /SKLearn_and_cuML.md: -------------------------------------------------------------------------------- 1 | # Scikit-Learn and cuML random forest support 2 | This page [has moved](https://github.com/triton-inference-server/fil_backend/blob/main/docs/sklearn_and_cuml.md). 3 | -------------------------------------------------------------------------------- /build_conda_env_container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | REPODIR=$(cd $(dirname $0); pwd) 19 | 20 | NUMARGS=$# 21 | ARGS=$* 22 | VALIDTARGETS="conda-dev conda-test" 23 | VALIDFLAGS="-h --help" 24 | VALIDARGS="${VALIDTARGETS} ${VALIDFLAGS}" 25 | HELP="$0 [ ...] 26 | where is: 27 | conda-dev - build container with dev Conda env 28 | conda-test - build container with test Conda env 29 | and is: 30 | -h - print this text 31 | 32 | The following environment variables are also accepted to allow further customization: 33 | CONDA_DEV_TAG - The tag to use for the image containing dev Conda env 34 | CONDA_TEST_TAG - The tag to use for the image containing test Conda env 35 | " 36 | 37 | export DOCKER_BUILDKIT=1 38 | 39 | function hasArg { 40 | (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") 41 | } 42 | 43 | if hasArg -h || hasArg --help || (( ${NUMARGS} == 0 )) 44 | then 45 | echo "${HELP}" 46 | exit 0 47 | fi 48 | 49 | if [ -z $CONDA_DEV_TAG ] 50 | then 51 | CONDA_DEV_TAG='triton_fil_dev_conda' 52 | fi 53 | if [ -z $CONDA_TEST_TAG ] 54 | then 55 | CONDA_TEST_TAG='triton_fil_test_conda' 56 | fi 57 | 58 | BUILD_CONDA_DEV=0 59 | BUILD_CONDA_TEST=0 60 | if hasArg conda-dev 61 | then 62 | BUILD_CONDA_DEV=1 63 | elif hasArg conda-test 64 | then 65 | BUILD_CONDA_TEST=1 66 | fi 67 | 68 | if [ $BUILD_CONDA_DEV -eq 1 ] 69 | then 70 | docker build \ 71 | $DOCKER_ARGS \ 72 | --target conda-dev \ 73 | -t "$CONDA_DEV_TAG" \ 74 | -f ops/Dockerfile \ 75 | $REPODIR 76 | fi 77 | 78 | if [ $BUILD_CONDA_TEST -eq 1 ] 79 | then 80 | docker build \ 81 | $DOCKER_ARGS \ 82 | --target base-test-install \ 83 | -t "$CONDA_TEST_TAG" \ 84 | -f ops/Dockerfile \ 85 | $REPODIR 86 | fi 87 | -------------------------------------------------------------------------------- /ci/gitlab/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # ENVIRONMENT VARIABLE OPTIONS 6 | # PREBUILT_SERVER_TAG: The tag of the prebuilt Triton server image to test 7 | # PREBUILT_TEST_TAG: The tag of the prebuilt test image to run tests in 8 | # MODEL_BUILDER_IMAGE: A Docker image to be used for training test models 9 | # LOG_DIR: Host directory for storing logs 10 | # NV_DOCKER_ARGS: A bash expression that (when evaluated) returns Docker 11 | # arguments for controlling GPU access 12 | # BUILDPY: 1 to use Triton's build.py script for server build 13 | # CPU_ONLY: 1 to build without GPU support 14 | # NO_CACHE: 0 to enable Docker cache during build 15 | # USE_CLIENT_WHEEL: 1 to install Triton client from wheel for tests 16 | # SDK_IMAGE: If set, copy client wheel from this SDK image 17 | 18 | REPO_DIR=$(cd $(dirname $0)/../../; pwd) 19 | BUILDPY=${BUILDPY:-0} 20 | CPU_ONLY=${CPU_ONLY:-0} 21 | NO_CACHE=${NO_CACHE:-1} 22 | 23 | if [ -z $CI_COMMIT_BRANCH ] 24 | then 25 | export BUILDPY_BRANCH="$CI_COMMIT_BRANCH" 26 | fi 27 | 28 | # Check if test or base images need to be built and do so if necessary 29 | if [ -z $PREBUILT_SERVER_TAG ] 30 | then 31 | export SERVER_TAG=triton_fil 32 | else 33 | export PREBUILT_IMAGE="$PREBUILT_SERVER_TAG" 34 | export SERVER_TAG="$PREBUILT_SERVER_TAG" 35 | fi 36 | [ -z $TRITON_SERVER_REPO_TAG ] || export TRITON_REF="$TRITON_SERVER_REPO_TAG" 37 | [ -z $TRITON_COMMON_REPO_TAG ] || export COMMON_REF="$TRITON_COMMON_REPO_TAG" 38 | [ -z $TRITON_CORE_REPO_TAG ] || export CORE_REF="$TRITON_CORE_REPO_TAG" 39 | [ -z $TRITON_BACKEND_REPO_TAG ] || export BACKEND_REF="$TRITON_BACKEND_REPO_TAG" 40 | 41 | if [ -z $PREBUILT_TEST_TAG ] 42 | then 43 | export TEST_TAG=triton_fil_test 44 | echo "Building Docker images..." 45 | if [ $BUILDPY -eq 1 ] 46 | then 47 | BUILDARGS='--buildpy' 48 | else 49 | BUILDARGS='' 50 | fi 51 | if [ $CPU_ONLY -eq 1 ] 52 | then 53 | BUILDARGS="$BUILDARGS --cpu-only" 54 | fi 55 | if [ $NO_CACHE -eq 1 ] 56 | then 57 | BUILDARGS="$BUILDARGS --no-cache" 58 | fi 59 | if [ ! -z $SDK_IMAGE ] 60 | then 61 | USE_CLIENT_WHEEL=1 62 | export SDK_IMAGE="${SDK_IMAGE}" 63 | fi 64 | if [ ! -z $USE_CLIENT_WHEEL ] 65 | then 66 | export USE_CLIENT_WHEEL="${USE_CLIENT_WHEEL}" 67 | fi 68 | $REPO_DIR/build.sh $BUILDARGS 69 | else 70 | export TEST_TAG="$PREBUILT_TEST_TAG" 71 | fi 72 | 73 | MODEL_BUILDER_IMAGE=${MODEL_BUILDER_IMAGE:-${TEST_TAG}} 74 | 75 | # Set up directory for logging 76 | if [ -z $LOG_DIR ] 77 | then 78 | LOG_DIR="qa/logs" 79 | else 80 | LOG_DIR="$(readlink -f $LOG_DIR)" 81 | fi 82 | if [ ! -d "${LOG_DIR}" ] 83 | then 84 | mkdir -p "${LOG_DIR}" 85 | fi 86 | 87 | if [ -z "$NV_DOCKER_ARGS" ] 88 | then 89 | if [ -z $CUDA_VISIBLE_DEVICES ] 90 | then 91 | GPU_DOCKER_ARGS='--gpus all' 92 | else 93 | GPU_DOCKER_ARGS='--gpus $CUDA_VISIBLE_DEVICES' 94 | fi 95 | else 96 | GPU_DOCKER_ARGS="$(eval ${NV_DOCKER_ARGS} || echo -n '')" 97 | fi 98 | 99 | if [ ! -z $RUNNER_ID ] 100 | then 101 | DOCKER_ARGS="$DOCKER_ARGS --label RUNNER_ID=${RUNNER_ID}" 102 | fi 103 | 104 | echo "Generating example models..." 105 | # Use 'docker cp' instead of mounting, because we cannot mount directories 106 | # from the GitLab runner due to the "Docker-outside-of-Docker" architecture. 107 | # See https://confluence.nvidia.com/pages/viewpage.action?spaceKey=DL&title=GitLab+Runner 108 | # for more details. 109 | MODEL_BUILDER_INST=model_builder_inst_${CI_JOB_ID} 110 | docker create -t --name ${MODEL_BUILDER_INST} \ 111 | -e RETRAIN=1 \ 112 | -e OWNER_ID=$(id -u) \ 113 | -e OWNER_GID=$(id -g) \ 114 | $GPU_DOCKER_ARGS \ 115 | $DOCKER_ARGS \ 116 | $MODEL_BUILDER_IMAGE \ 117 | bash 118 | docker start ${MODEL_BUILDER_INST} 119 | docker exec ${MODEL_BUILDER_INST} bash -c 'mkdir -p /qa/L0_e2e/ && mkdir -p /qa/logs/' 120 | mkdir -p qa/L0_e2e/model_repository/ 121 | mkdir -p qa/L0_e2e/cpu_model_repository/ 122 | docker cp qa/L0_e2e/model_repository/ ${MODEL_BUILDER_INST}:/qa/L0_e2e/ 123 | docker cp qa/L0_e2e/cpu_model_repository/ ${MODEL_BUILDER_INST}:/qa/L0_e2e/ 124 | 125 | docker exec \ 126 | ${MODEL_BUILDER_INST} \ 127 | bash -c 'source /conda/test/bin/activate && /qa/generate_example_models.sh' 128 | 129 | docker cp ${MODEL_BUILDER_INST}:/qa/L0_e2e/model_repository/ qa/L0_e2e/ 130 | docker cp ${MODEL_BUILDER_INST}:/qa/L0_e2e/cpu_model_repository/ qa/L0_e2e/ 131 | docker cp ${MODEL_BUILDER_INST}:/qa/logs/. "${LOG_DIR}" 132 | docker stop ${MODEL_BUILDER_INST} 133 | docker rm ${MODEL_BUILDER_INST} 134 | 135 | if [ $CPU_ONLY -eq 1 ] 136 | then 137 | DOCKER_ARGS="${DOCKER_ARGS} -e TRITON_ENABLE_GPU=OFF" 138 | else 139 | DOCKER_ARGS="${DOCKER_ARGS} ${GPU_DOCKER_ARGS}" 140 | fi 141 | 142 | echo "Running tests..." 143 | TEST_INST=test_inst_${CI_JOB_ID} 144 | docker create -t --name ${TEST_INST} \ 145 | -e TEST_PROFILE=ci \ 146 | $DOCKER_ARGS \ 147 | $TEST_TAG \ 148 | bash 149 | docker start ${TEST_INST} 150 | docker exec ${TEST_INST} bash -c 'mkdir -p /qa/L0_e2e/ && mkdir -p /qa/logs/' 151 | docker cp qa/L0_e2e/model_repository/ ${TEST_INST}:/qa/L0_e2e/ 152 | docker cp qa/L0_e2e/cpu_model_repository/ ${TEST_INST}:/qa/L0_e2e/ 153 | docker exec ${TEST_INST} bash -c 'source /conda/test/bin/activate && /qa/entrypoint.sh' 154 | 155 | docker cp ${TEST_INST}:/qa/logs/. "${LOG_DIR}" 156 | docker stop ${TEST_INST} 157 | docker rm ${TEST_INST} 158 | -------------------------------------------------------------------------------- /ci/local/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # ENVIRONMENT VARIABLE OPTIONS 6 | # RETRAIN: 1 to force retraining of existing models, 0 to use existing models 7 | # if available 8 | # USE_CLIENT_WHEEL: 1 to install Triton client from wheel for tests 9 | # SDK_IMAGE: If set, copy client wheel from this SDK image 10 | # HOST_BUILD: 1 to build backend lib on host and use it in tests 11 | 12 | REPO_DIR=$(cd $(dirname $0)/../../; pwd) 13 | QA_DIR="${REPO_DIR}/qa" 14 | MODEL_DIR="${QA_DIR}/L0_e2e/model_repository" 15 | CPU_MODEL_DIR="${QA_DIR}/L0_e2e/cpu_model_repository" 16 | HOST_BUILD="${HOST_BUILD:-0}" 17 | TEST_PROFILE="${TEST_PROFILE:-dev}" 18 | 19 | export SERVER_TAG=triton_fil 20 | export TEST_TAG=triton_fil_test 21 | 22 | if [ ! -z $SDK_IMAGE ] 23 | then 24 | export SDK_IMAGE="${SDK_IMAGE}" 25 | USE_CLIENT_WHEEL=1 26 | fi 27 | if [ ! -z $USE_CLIENT_WHEEL ] 28 | then 29 | export USE_CLIENT_WHEEL="${USE_CLIENT_WHEEL}" 30 | fi 31 | 32 | BUILD_ARGS='' 33 | if [ $HOST_BUILD -eq 1 ] 34 | then 35 | BUILD_ARGS="$BUILD_ARGS --host" 36 | fi 37 | 38 | echo "Building Docker images..." 39 | $REPO_DIR/build.sh $BUILD_ARGS 40 | 41 | DOCKER_ARGS="-t -v ${QA_DIR}/logs:/qa/logs" 42 | 43 | if [ -z $CUDA_VISIBLE_DEVICES ] 44 | then 45 | DOCKER_ARGS="$DOCKER_ARGS --gpus all" 46 | else 47 | DOCKER_ARGS="$DOCKER_ARGS --gpus $CUDA_VISIBLE_DEVICES" 48 | fi 49 | 50 | echo "Generating example models..." 51 | docker run \ 52 | -e RETRAIN=${RETRAIN:-0} \ 53 | -e OWNER_ID=$(id -u) \ 54 | -e OWNER_GID=$(id -g) \ 55 | -e TEST_PROFILE=$TEST_PROFILE \ 56 | $DOCKER_ARGS \ 57 | -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \ 58 | -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \ 59 | --rm $TEST_TAG \ 60 | bash -c 'source /conda/test/bin/activate && /qa/generate_example_models.sh' 61 | 62 | echo "Running GPU-enabled tests..." 63 | docker run \ 64 | $DOCKER_ARGS \ 65 | -e TEST_PROFILE=$TEST_PROFILE \ 66 | -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \ 67 | -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \ 68 | --rm $TEST_TAG 69 | 70 | export SERVER_TAG=triton_fil:cpu 71 | export TEST_TAG=triton_fil_test:cpu 72 | 73 | echo "Building CPU-only Docker images..." 74 | $REPO_DIR/build.sh $BUILD_ARGS --cpu-only 75 | 76 | echo "Running CPU-only tests..." 77 | docker run \ 78 | $DOCKER_ARGS \ 79 | -e TRITON_ENABLE_GPU=OFF \ 80 | -e TEST_PROFILE=$TEST_PROFILE \ 81 | -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \ 82 | -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \ 83 | --rm $TEST_TAG 84 | -------------------------------------------------------------------------------- /cmake/modules/ConfigureCUDA.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright (c) 2018-2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | if(DISABLE_DEPRECATION_WARNINGS) 18 | list(APPEND RAPIDS_TRITON_CXX_FLAGS -Wno-deprecated-declarations) 19 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations) 20 | endif() 21 | 22 | if(CMAKE_COMPILER_IS_GNUCXX) 23 | list(APPEND RAPIDS_TRITON_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) 24 | endif() 25 | 26 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) 27 | 28 | # set warnings as errors 29 | if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0) 30 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS -Werror=all-warnings) 31 | endif() 32 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) 33 | 34 | # Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking 35 | if(CUDA_ENABLE_LINEINFO) 36 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS -lineinfo) 37 | endif() 38 | 39 | # Debug options 40 | if(CMAKE_BUILD_TYPE MATCHES Debug) 41 | message(VERBOSE "RAPIDS_TRITON: Building with debugging flags") 42 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS -G -Xcompiler=-rdynamic) 43 | endif() 44 | -------------------------------------------------------------------------------- /cmake/thirdparty/get_cuml.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | function(find_and_configure_cuml) 18 | 19 | set(oneValueArgs VERSION FORK PINNED_TAG USE_TREELITE_STATIC) 20 | cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" 21 | "${multiValueArgs}" ${ARGN} ) 22 | 23 | set(CUML_ALGORITHMS "FIL" CACHE STRING "List of algorithms to build in cuml") 24 | list(APPEND CUML_ALGORITHMS "TREESHAP") 25 | 26 | rapids_cpm_find(cuml ${PKG_VERSION} 27 | GLOBAL_TARGETS cuml++ 28 | BUILD_EXPORT_SET rapids_triton-exports 29 | INSTALL_EXPORT_SET rapids_triton-exports 30 | CPM_ARGS 31 | GIT_REPOSITORY https://github.com/${PKG_FORK}/cuml.git 32 | GIT_TAG ${PKG_PINNED_TAG} 33 | SOURCE_SUBDIR cpp 34 | OPTIONS 35 | "BUILD_CUML_C_LIBRARY OFF" 36 | "BUILD_CUML_CPP_LIBRARY ON" 37 | "BUILD_CUML_TESTS OFF" 38 | "BUILD_PRIMS_TESTS OFF" 39 | "BUILD_CUML_MG_TESTS OFF" 40 | "BUILD_CUML_EXAMPLES OFF" 41 | "BUILD_CUML_BENCH OFF" 42 | "BUILD_CUML_PRIMS_BENCH OFF" 43 | "BUILD_CUML_STD_COMMS OFF" 44 | "BUILD_SHARED_LIBS ON" 45 | "CUML_USE_TREELITE_STATIC ${PKG_USE_TREELITE_STATIC}" 46 | "USE_CCACHE ON" 47 | "RAFT_COMPILE_LIBRARIES OFF" 48 | "RAFT_ENABLE_NN_DEPENDENCIES OFF" 49 | ) 50 | 51 | message(VERBOSE "RAPIDS_TRITON: Using CUML located in ${cuml_SOURCE_DIR}") 52 | 53 | endfunction() 54 | 55 | # Change pinned tag here to test a commit in CI 56 | # To use a different RAFT locally, set the CMake variable 57 | # CPM_raft_SOURCE=/path/to/local/raft 58 | find_and_configure_cuml(VERSION 25.04 59 | FORK rapidsai 60 | PINNED_TAG branch-25.04 61 | USE_TREELITE_STATIC ${TRITON_FIL_USE_TREELITE_STATIC} 62 | ) 63 | -------------------------------------------------------------------------------- /cmake/thirdparty/get_gtest.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | function(find_and_configure_gtest VERSION) 18 | 19 | if(TARGET GTest::gtest) 20 | return() 21 | endif() 22 | 23 | rapids_cpm_find(GTest ${VERSION} 24 | GLOBAL_TARGETS gtest gtest_main GTest::gtest GTest::gtest_main gmock gmock_main 25 | CPM_ARGS 26 | GIT_REPOSITORY https://github.com/google/googletest.git 27 | GIT_TAG release-${VERSION} 28 | GIT_SHALLOW TRUE 29 | OPTIONS "INSTALL_GTEST OFF" 30 | # googletest >= 1.10.0 provides a cmake config file -- use it if it exists 31 | FIND_PACKAGE_ARGUMENTS "CONFIG" 32 | ) 33 | 34 | if(NOT TARGET GTest::gtest) 35 | add_library(GTest::gtest ALIAS gtest) 36 | add_library(GTest::gtest_main ALIAS gtest_main) 37 | endif() 38 | 39 | endfunction() 40 | 41 | set(RAFT_MIN_VERSION_gtest 1.10.0) 42 | 43 | find_and_configure_gtest(${RAFT_MIN_VERSION_gtest}) 44 | -------------------------------------------------------------------------------- /cmake/thirdparty/get_rapids-triton.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright (c) 2021-2024, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | function(find_and_configure_rapids_triton) 18 | 19 | set(oneValueArgs VERSION FORK PINNED_TAG) 20 | cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" 21 | "${multiValueArgs}" ${ARGN} ) 22 | 23 | rapids_cpm_find(rapids_triton ${PKG_VERSION} 24 | GLOBAL_TARGETS rapids_triton::rapids_triton 25 | BUILD_EXPORT_SET rapids_triton-exports 26 | INSTALL_EXPORT_SET rapids_triton-exports 27 | CPM_ARGS 28 | GIT_REPOSITORY ${PKG_FORK} 29 | GIT_TAG ${PKG_PINNED_TAG} 30 | SOURCE_SUBDIR cpp 31 | OPTIONS 32 | "BUILD_TESTS OFF" 33 | "BUILD_EXAMPLE OFF" 34 | ) 35 | endfunction() 36 | 37 | find_and_configure_rapids_triton(VERSION ${RAPIDS_DEPENDENCIES_VERSION} 38 | FORK ${RAPIDS_TRITON_REPO_PATH} 39 | PINNED_TAG ${RAPIDS_TRITON_REPO_TAG} 40 | ) 41 | -------------------------------------------------------------------------------- /cmake/thirdparty/get_treelite.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright (c) 2021-2022, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | function(find_and_configure_treelite) 18 | 19 | set(oneValueArgs VERSION PINNED_TAG BUILD_STATIC_LIBS) 20 | cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" 21 | "${multiValueArgs}" ${ARGN} ) 22 | 23 | message(VERBOSE "CUML: In treelite func, static: ${PKG_BUILD_STATIC_LIBS}") 24 | if(NOT PKG_BUILD_STATIC_LIBS) 25 | list(APPEND TREELITE_LIBS treelite::treelite) 26 | else() 27 | list(APPEND TREELITE_LIBS treelite::treelite_static) 28 | endif() 29 | 30 | rapids_cpm_find(Treelite ${PKG_VERSION} 31 | GLOBAL_TARGETS ${TREELITE_LIBS} 32 | INSTALL_EXPORT_SET cuml-exports 33 | CPM_ARGS 34 | GIT_REPOSITORY https://github.com/dmlc/treelite.git 35 | GIT_TAG ${PKG_PINNED_TAG} 36 | OPTIONS 37 | "USE_OPENMP ON" 38 | "Treelite_BUILD_STATIC_LIBS ${PKG_BUILD_STATIC_LIBS}" 39 | ) 40 | 41 | 42 | list(APPEND TREELITE_LIBS_NO_PREFIX treelite) 43 | if(Treelite_ADDED AND PKG_BUILD_STATIC_LIBS) 44 | list(APPEND TREELITE_LIBS_NO_PREFIX treelite_static) 45 | endif() 46 | 47 | set(Treelite_ADDED ${Treelite_ADDED} PARENT_SCOPE) 48 | set(TREELITE_LIBS ${TREELITE_LIBS} PARENT_SCOPE) 49 | 50 | if(Treelite_ADDED) 51 | if (NOT PKG_BUILD_STATIC_LIBS) 52 | target_include_directories(treelite 53 | PUBLIC $ 54 | $) 55 | if(NOT TARGET treelite::treelite) 56 | add_library(treelite::treelite ALIAS treelite) 57 | endif() 58 | else() 59 | target_include_directories(treelite_static 60 | PUBLIC $ 61 | $) 62 | if(NOT TARGET treelite::treelite_static) 63 | add_library(treelite::treelite_static ALIAS treelite_static) 64 | endif() 65 | endif() 66 | 67 | rapids_export(BUILD Treelite 68 | EXPORT_SET TreeliteTargets 69 | GLOBAL_TARGETS ${TREELITE_LIBS_NO_PREFIX} 70 | NAMESPACE treelite::) 71 | endif() 72 | 73 | # We generate the treelite-config files when we built treelite locally, so always do `find_dependency` 74 | rapids_export_package(BUILD Treelite cuml-exports) 75 | 76 | # Tell cmake where it can find the generated treelite-config.cmake we wrote. 77 | include("${rapids-cmake-dir}/export/find_package_root.cmake") 78 | rapids_export_find_package_root(BUILD Treelite [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuml-exports) 79 | endfunction() 80 | 81 | find_and_configure_treelite(VERSION 4.4.1 82 | PINNED_TAG 386bd0de99f5a66584c7e58221ee38ce606ad1ae 83 | BUILD_STATIC_LIBS ${TRITON_FIL_USE_TREELITE_STATIC}) 84 | -------------------------------------------------------------------------------- /conda/environments/buildpy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: triton_buildpy 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - docker-py 7 | - python 8 | - distro 9 | -------------------------------------------------------------------------------- /conda/environments/rapids_triton_dev.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: rapids_triton_dev 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - ccache 7 | - cmake>=4.0 8 | - ninja 9 | - python 10 | # TODO(hcho3): Remove the pin when 11 | # https://github.com/triton-inference-server/common/pull/114 is merged 12 | - rapidjson>=1.1.0,<1.1.0.post* 13 | -------------------------------------------------------------------------------- /conda/environments/triton_benchmark.yml: -------------------------------------------------------------------------------- 1 | name: triton_benchmark 2 | channels: 3 | - conda-forge 4 | - nvidia 5 | - rapidsai 6 | dependencies: 7 | - cuda-version=12.8 8 | - cudf=25.04 9 | - libcusolver 10 | - libcusparse 11 | - matplotlib 12 | - pip 13 | - python 14 | - scipy 15 | - pip: 16 | - tritonclient[all] 17 | - protobuf 18 | - git+https://github.com/rapidsai/rapids-triton.git@branch-25.06#subdirectory=python 19 | -------------------------------------------------------------------------------- /conda/environments/triton_test.yml: -------------------------------------------------------------------------------- 1 | name: triton_test 2 | channels: 3 | - conda-forge 4 | - nvidia 5 | - rapidsai 6 | dependencies: 7 | - aws-sdk-cpp 8 | - clang-tools=19.1.7 9 | - cuda-version=12.8 10 | - cudf=25.04 11 | - cuml=25.04 12 | - flake8 13 | - hypothesis 14 | - lightgbm 15 | - matplotlib 16 | - pip 17 | - pytest 18 | - python 19 | - rapidsai::xgboost>=2.1 20 | - scikit-learn>=1.5 21 | - treelite>=4.4 22 | - pip: 23 | - tritonclient[all] 24 | - protobuf 25 | - git+https://github.com/rapidsai/rapids-triton.git@branch-25.06#subdirectory=python 26 | -------------------------------------------------------------------------------- /conda/environments/triton_test_no_client.yml: -------------------------------------------------------------------------------- 1 | name: triton_test 2 | channels: 3 | - conda-forge 4 | - nvidia 5 | - rapidsai 6 | dependencies: 7 | - aws-sdk-cpp 8 | - clang-tools=19.1.7 9 | - cuda-version=12.8 10 | - cudf=25.04 11 | - cuml=25.04 12 | - flake8 13 | - hypothesis 14 | - lightgbm 15 | - pip 16 | - pytest 17 | - python 18 | - python-rapidjson 19 | - rapidsai::xgboost>=2.1 20 | - scikit-learn>=1.5 21 | - treelite>=4.4 22 | -------------------------------------------------------------------------------- /docs/build.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Building the FIL Backend 30 | Triton backends are implemented as shared libraries which are conditionally 31 | loaded by the main Triton server process. To build the FIL backend shared 32 | library or simply to create a Docker image with a fresh build of the backend, 33 | you may follow the indicated steps. 34 | 35 | **Note**: Most users will not need to build their own copy of the FIL backend. 36 | These instructions are intended for developers and those who wish to make 37 | custom tweaks to the backend. If you are just looking for install instructions, 38 | follow our [installation guide](docs/install.md). 39 | 40 | ## Prerequisites 41 | The FIL backend may be built either using Docker or on the host. We 42 | recommend using the Dockerized build in order to simplify dependency management 43 | unless you have a specific need to build on the host. 44 | 45 | ### Dockerized Build 46 | - [Docker](https://docs.docker.com/get-docker/) 47 | - [The NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) 48 | 49 | ### Host Build 50 | - [CUDA toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) (Only required for GPU-enabled builds) 51 | - [CMake](https://cmake.org/install/) 52 | - [Ninja](https://ninja-build.org/) (Optional but recommended) 53 | Except for the CUDA toolkit, these dependencies can be installed via conda using the provided 54 | [environment 55 | file](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/rapids_triton_dev.yml): 56 | 57 | ```bash 58 | conda env create -f conda/environments/rapids_triton_dev.yml 59 | conda activate rapids_triton_dev 60 | ``` 61 | 62 | 63 | ## Using the Build Script 64 | To simplify the build process, the FIL backend provides a `build.sh` script at 65 | the root of the repo. For most use cases, it is sufficient to simply 66 | invoke the script: 67 | 68 | ```bash 69 | ./build.sh 70 | ``` 71 | 72 | This is a lightweight wrapper around a `docker build` command which helps 73 | provide the correct build arguments and variables. By default, it will build 74 | *both* a "server" image which is equivalent to the usual Triton Docker image 75 | and a "test" image whose entrypoint will invoke the FIL backend's tests. 76 | 77 | ### Build Options 78 | The build script uses a number of flags and environment variables to 79 | control the details of what gets built and how. These options are 80 | summarized below: 81 | 82 | #### Flags 83 | - `-g`: Perform a debug build 84 | - `-h`: Print help test for build script 85 | - `--cpu-only`: Build CPU-only version of library 86 | - `--tag-commit`: Tag Docker images using the current git commit 87 | - `--no-cache`: Disable Docker cache for this build 88 | - `--host`: Build on host, **not** in Docker 89 | - `--buildpy`: Invoke Triton's `build.py` script to perform build. 90 | **Note:** This is **not** recommended for end-users. It is included 91 | primarily for testing compatibility with upstream build changes. If you must 92 | invoke this option, you will need the dependencies indicated in the 93 | associated conda [environment file](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/buildpy.yml). 94 | 95 | #### Environment variables 96 | ##### Standard options 97 | - `BASE_IMAGE`: The base image for Docker images or the build image for 98 | `build.py` if `--buildpy` is invoked 99 | - `TRITON_VERSION`: The version of Triton to use for this build 100 | - `SERVER_TAG`: The tag to use for the server image 101 | - `TEST_TAG`: The tag to use for the test image 102 | - `PREBUILT_IMAGE`: An existing Triton Docker image which you would like to 103 | run tests against. This will build the test image on top of the indicated 104 | image. 105 | - `RAPIDS_VERSION`: The version of RAPIDS to require for RAPIDS 106 | dependencies 107 | ##### Advanced options 108 | - `USE_CLIENT_WHEEL`: If 1, the Triton Python client will be 109 | installed from a wheel distributed in the Triton SDK Docker image. This 110 | option is useful for ARM development, since the Triton client cannot 111 | currently be installed via `pip` for ARM. 112 | - `SDK_IMAGE`: If set, this image will be used to provide the 113 | Python client wheel. Otherwise, if `USE_CLIENT_WHEEL` is set to 1 and this 114 | variable is unset, the image will be selected based on the Triton 115 | version. 116 | - `CONDA_DEV_TAG`: A Docker image containing the development conda 117 | environment. Used primarily to speed up CI; rarely invoked during 118 | development. 119 | - `CONDA_TEST_TAG`: A Docker image containing the test conda 120 | environment. Used primarily to speed up CI; rarely invoked during development 121 | - `TRITON_REF`: The commit ref for the Triton server repo when using 122 | `--buildpy` 123 | - `CORE_REF`: The commit ref for the Triton core repo when using 124 | `--buildpy` 125 | - `COMMON_REF`: The commit ref for the Triton common repo when using 126 | `--buildpy` 127 | - `BACKEND_REF`: The commit ref for the Triton backend repo when using 128 | `--buildpy` 129 | - `THIRDPARTY_REF`: The commit ref for the Triton third-party repo when using 130 | `--buildpy` 131 | - `JOB_ID`: Used for CI builds to uniquely identify a particular 132 | build job. 133 | - `BUILDPY_BRANCH`: Use this branch of the Triton server repo to 134 | provide the `build.py` script if `--buildpy` is used. 135 | - `TREELITE_STATIC`: if set to `ON`, Treelite will be statically linked into the built binaries 136 | -------------------------------------------------------------------------------- /docs/explainability.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Model Explainability with Shapley Values 30 | 31 | **NOTE: The CPU version of this feature is in an experimental state as of version 23.04** 32 | 33 | In addition to providing model output from forest models, the FIL backend 34 | can help you understand *why* the model came to a particular conclusion by 35 | providing Shapley values. Shapley values offer a measure of the extent to 36 | which individual features in an input contributed to the final model output. 37 | Features with high Shapley value scores can generally be understood to be more 38 | important to the model's conclusion than those with lower scores. 39 | 40 | Generally speaking, Shapley values are computed by computing the model output 41 | with and without a particular feature input and looking at how much the output 42 | changed. This is referred to as the marginal contribution of that 43 | feature. For a more complete understanding, check out the [Wikipedia 44 | article](https://en.wikipedia.org/wiki/Shapley_value) on Shapley values or 45 | Lloyd Shapley's [original 46 | paper](https://www.rand.org/content/dam/rand/pubs/research_memoranda/2008/RM670.pdf). 47 | 48 | **NOTE: Tree depth is limited to 32 for shapley value computation. Tree models with higher depth will throw an error.** 49 | 50 | ## Using Shapley Values in the FIL Backend 51 | Because it takes additional time to compute and return the relatively large 52 | output arrays for Shapley values, Shapley value computation is turned off by 53 | default in the FIL backend. 54 | 55 | To turn on Shapley Value support, you must add an additional output to the 56 | `config.pbtxt` file for your model as shown below: 57 | ```protobuf 58 | output [ 59 | { 60 | name: "output__0" 61 | data_type: TYPE_FP32 62 | dims: [ 2 ] 63 | }, 64 | { 65 | name: "treeshap_output" 66 | data_type: TYPE_FP32 67 | dims: [ 501 ] 68 | } 69 | ] 70 | backend: "fil" 71 | max_batch_size: 32768 72 | input [ 73 | { 74 | name: "input__0" 75 | data_type: TYPE_FP32 76 | dims: [ $NUM_FEATURES ] 77 | } 78 | ] 79 | output [ 80 | { 81 | name: "output__0" 82 | data_type: TYPE_FP32 83 | dims: [ 1 ] 84 | }, 85 | { 86 | name: "treeshap_output" 87 | data_type: TYPE_FP32 88 | dims: [ $NUM_FEATURES_PLUS_ONE ] 89 | } 90 | ] 91 | instance_group [{ kind: KIND_AUTO }] 92 | parameters [ 93 | { 94 | key: "model_type" 95 | value: { string_value: "$MODEL_TYPE" } 96 | }, 97 | { 98 | key: "output_class" 99 | value: { string_value: "$IS_A_CLASSIFIER" } 100 | } 101 | ] 102 | 103 | dynamic_batching {} 104 | ``` 105 | Note that the length of the `treeshap_output` is equal to the number of input 106 | features plus one to account for the bias term in the Shapley output. For a 107 | working example of model deployment with Shapley values, including how to 108 | retrieve those values using Triton's Python client, check out the [FAQ 109 | Notebook](https://nbviewer.org/github/triton-inference-server/fil_backend/blob/main/notebooks/faq/FAQs.ipynb#$\color{#76b900}{\text{FAQ-12:-How-do-I-retrieve-Shapley-values-for-model-explainability?}}$) 110 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Installation 30 | The FIL backend is a part of Triton and can be installed via the methods 31 | described in the [main Triton 32 | documentation](https://github.com/triton-inference-server/server#build-and-deploy). 33 | To quickly get up and running with a Triton Docker image, follow these 34 | steps. 35 | 36 | **Note**: Looking for instructions to *build* the FIL backend yourself? Check out our [build 37 | guide](build.md). 38 | 39 | ## Prerequisites 40 | - [Docker](https://docs.docker.com/get-docker/) 41 | - [The NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) 42 | 43 | ## Getting the container 44 | Triton containers are available from NGC and may be pulled down via 45 | 46 | ```bash 47 | docker pull nvcr.io/nvidia/tritonserver:22.10-py3 48 | ``` 49 | 50 | Note that the FIL backend cannot be used in the `21.06` version of this 51 | container; the `21.06.1` patch release is the earliest Triton version with a 52 | working FIL backend implementation. 53 | 54 | ## Starting the container 55 | In order to actually deploy a model, you will need to provide the serialized 56 | model and configuration file in a specially-structured directory called the 57 | "model repository." Check out the 58 | [configuration guide](docs/model_config.md) for details on how to do this for your model. 59 | 60 | Assuming your model repository is on the host system, you can 61 | bind-mount it into the container and start the server via the following 62 | command: 63 | ``` 64 | docker run --gpus all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${MODEL_REPO}:/models --name tritonserver nvcr.io/nvidia/tritonserver:22.11-py3 tritonserver --model-repository=/models 65 | ``` 66 | Remember that bind-mounts **require an absolute path** to the host 67 | directory, so `${MODEL_REPO}` should be replaced by the absolute path to the 68 | model repository directory on the host. 69 | 70 | Assuming you started your container with the name "tritonserver" as in the 71 | above snippet, you can bring the server down again and remove the 72 | container with: 73 | ``` 74 | docker rm -f tritonserver 75 | ``` 76 | -------------------------------------------------------------------------------- /docs/model_support.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Model Support and Limitations 30 | The FIL backend is designed to accelerate inference for **tree-based models**. 31 | If the model you are trying to deploy is not tree-based, consider using one of 32 | Triton's other backends. 33 | 34 | ## Frameworks 35 | The FIL backend supports most XGBoost and LightGBM models using their native 36 | serialization formats. The FIL backend also supports the following model types 37 | from [Scikit-Learn and cuML](sklearn_and_cuml.md) using Treelite's checkpoint serialization format: 38 | 39 | - GradientBoostingClassifier 40 | - GradientBoostingRegressor 41 | - IsolationForest 42 | - RandomForestRegressor 43 | - ExtraTreesClassifier 44 | - ExtraTreesRegressor 45 | 46 | In addition, the FIL backend can perform inference on tree models from any 47 | framework if they are first exported to Treelite's checkpoint serialization 48 | format. 49 | 50 | ## Serialization Formats 51 | The FIL backend currently supports the following serialization formats: 52 | 53 | - XGBoost JSON 54 | - XGBoost UBJSON 55 | - XGBoost Binary 56 | - LightGBM Text 57 | - Treelite binary checkpoint 58 | 59 | The FIL backend does **not** support direct ingestion of Pickle files. The 60 | pickled model must be converted to one of the above formats before it can be 61 | used in Triton. 62 | 63 | ## Version Compatibility 64 | Until version 3.0 of Treelite, Treelite offered no backward compatibility 65 | for its checkpoint format even among minor releases. Therefore, the version 66 | of Treelite used to save a checkpoint had to exactly match the version used in 67 | the FIL backend. Starting with version 3.0, Treelite supports checkpoint 68 | output from any version of Treelite starting with 2.7 until the next major 69 | release. 70 | 71 | XGBoost's JSON format also changes periodically between minor versions, and 72 | older versions of Treelite used in the FIL backend may not support those 73 | changes. 74 | 75 | The compatibility matrix for Treelite and XGBoost with the FIL backend is 76 | shown below: 77 | 78 | | Triton Version | Supported Treelite Version(s) | Supported XGBoost models | 79 | | -------------- | ----------------------------- | -------------------------------------- | 80 | | 21.08 | 1.3.0 | XGBoost JSON <1.6 | 81 | | 21.09-21.10 | 2.0.0 | XGBoost JSON <1.6 | 82 | | 21.11-22.02 | 2.1.0 | XGBoost JSON <1.6 | 83 | | 22.03-22.06 | 2.3.0 | XGBoost JSON <1.6 | 84 | | 22.07 | 2.4.0 | XGBoost JSON <1.7 | 85 | | 22.08-24.02 | 2.4.0; >=3.0.0,<4.0.0 | XGBoost JSON <1.7 | 86 | | 24.03+ | 3.9.0; >=4.0.0,<5.0.0 | XGBoost JSON 1.7+ | 87 | | 24.10+ | 3.9.0; >=4.0.0,<5.0.0 | XGBoost JSON 1.7+, XGBoost UBJSON 2.1+ | 88 | 89 | ## Limitations 90 | The FIL backend currently does not support any multi-output regression models. 91 | 92 | ## Double-Precision Support 93 | While the FIL backend can load double-precision models, it performs all 94 | computations in single-precision mode. This can lead to slight differences in 95 | model output for frameworks like LightGBM which natively use double precision. 96 | Support for double-precision execution is planned for an upcoming release. 97 | 98 | ## Categorical Feature Support 99 | As of version 21.11, the FIL backend includes support for models with 100 | categorical features (e.g. some 101 | [XGBoost](https://xgboost.readthedocs.io/en/stable/tutorials/categorical.html) and [LightGBM ](https://lightgbm.readthedocs.io/en/latest/Advanced-Topics.html#categorical-feature-support)) models. 102 | These models can be deployed just like any other model, but it is worth 103 | remembering that (as with any other inference pipeline which includes 104 | categorical features), care must be taken to ensure that the categorical 105 | encoding used during inference matches that used during training. If the data 106 | passed through at inference time does not contain all of the categories used 107 | during training, there is no way to reconstruct the correct mapping of 108 | features, so some record must be made of the complete set of categories used 109 | during training. With that record, categorical columns can be appropriately 110 | converted to float32 columns, and submitted to Triton as with any other input. 111 | 112 | For a fully-worked example of using a model with categorical features, check 113 | out the [introductory fraud detection notebook](https://nbviewer.org/github/triton-inference-server/fil_backend/blob/main/notebooks/categorical-fraud-detection/Fraud_Detection_Example.ipynb). 114 | -------------------------------------------------------------------------------- /docs/repo_overview.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Repo Overview 30 | 31 | The FIL backend repo is organized in the following directories: 32 | 33 | ## `ci` 34 | This directory contains scripts and configuration files for working with CI. 35 | Developers may invoke `ci/local/build.sh` to build and run tests locally or 36 | `ci/gitlab/build.sh` to more precisely mirror the test environment run in 37 | official CI. This directory is not intended for end users. 38 | 39 | ## `cmake` 40 | This directory contains CMake files required for the build, especially those 41 | which are used to retrieve external dependencies. It is not intended for 42 | end users. 43 | 44 | ## `conda` 45 | This directory contains conda-related infrastructure including environment yaml 46 | files used to construct build and test environments: 47 | 48 | - `conda/environments/buildpy.yml`: Minimal environment for using Triton's 49 | `build.py` build script 50 | - `conda/environments/rapids_triton_dev.yml`: Environment for building the FIL 51 | backend 52 | - `conda/environments/triton_benchmark.yml`: Environment for running the FIL 53 | backend's standard benchmarks 54 | - `conda/environments/triton_test_no_client.yml`: Environment for running tests 55 | for the FIL backend. This file does not include Triton's Python client to 56 | facilitate testing on ARM machines, where the client cannot be correctly 57 | installed via pip. 58 | - `conda/environments/triton_test.yml`: Environment for running tests for the 59 | FIL backend that includes Triton's Python client. Recommended environment for 60 | those wishing to run tests outside of Docker. 61 | 62 | ## `docs` 63 | This directory contains markdown files for documentation. 64 | 65 | ## `notebooks` 66 | This directory contains example Jupyter notebooks for using the FIL backend. 67 | 68 | ## `ops` 69 | This directory contains files used for build-related tasks including the 70 | Dockerfile for the FIL backend's dockerized build. It is not intended for end 71 | users. 72 | 73 | ## `qa` 74 | This directory contains files for running tests and benchmarks. It is not 75 | intended for end users. 76 | 77 | ## `scripts` 78 | This directory contains utility scripts for e.g. converting models to Treelite 79 | checkpoint format. It also contains a conda environment file indicating the 80 | necessary dependencies for running these scripts. 81 | 82 | ## `src` 83 | This directory contains the C++ source files for the FIL backend. It is not 84 | intended for end users. 85 | -------------------------------------------------------------------------------- /docs/sklearn_and_cuml.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Scikit-Learn and cuML Support 30 | 31 | **NOTE:** Due to a change in Scikit-Learn 1.2.0, forest models from version 32 | 1.2.0 and later are not currently supported. Support will be added in an 33 | upcoming release of Triton. 34 | 35 | ## Model Serialization 36 | 37 | While LightGBM and XGBoost have their own serialization formats that are 38 | directly supported by the Triton FIL backend, tree models trained with 39 | [Scikit-Learn](https://scikit-learn.org/stable/modules/model_persistence.html) 40 | or [cuML](https://docs.rapids.ai/api/cuml/stable/pickling_cuml_models.html) are 41 | generally serialized using Python's 42 | [pickle](https://docs.python.org/3/library/pickle.html) module. In order to 43 | avoid a round-trip through Python in Triton, the FIL backend instead requires 44 | that these pickled models first be converted to Treelite's binary checkpoint 45 | format. Note that this also allows you to make use of *any* Treelite-supported 46 | model framework in Triton simply by exporting to the binary checkpoint format. 47 | 48 | The FIL backend repo includes scripts for easy conversion from 49 | pickle-serialized cuML or Scikit-Learn models to Treelite checkpoints. You can 50 | download the relevant script for Scikit-Learn 51 | [here](https://raw.githubusercontent.com/triton-inference-server/fil_backend/main/scripts/convert_sklearn.py) 52 | and for cuML 53 | [here](https://raw.githubusercontent.com/triton-inference-server/fil_backend/main/scripts/convert_cuml.py). 54 | 55 | ## Prerequisites 56 | 57 | To use the Scikit-Learn conversion script, you must run it from within a Python 58 | environment containing both 59 | [Scikit-Learn](https://scikit-learn.org/stable/install.html) and 60 | [Treelite](https://treelite.readthedocs.io/en/latest/install.html). To use the 61 | cuML conversion script, you must run it from within a Python environment 62 | containing [cuML](https://rapids.ai/start.html). 63 | 64 | For convenience, a conda environment config file 65 | [is provided](https://raw.githubusercontent.com/triton-inference-server/fil_backend/main/scripts/environment.yml) 66 | which will install all three of these prerequisites: 67 | 68 | ``` 69 | conda env create -f scripts/environment.yml 70 | conda activate triton_scripts 71 | ``` 72 | 73 | ## Converting to Treelite checkpoints 74 | 75 | **NOTE:** The following steps are **not** necessary for LightGBM or XGBoost 76 | models. The FIL backend supports the native serialization formats for these 77 | frameworks directly. 78 | 79 | If you already have a Scikit-Learn or cuML RF model saved as a pickle file 80 | (`model.pkl`), place it in a directory structure as follows: 81 | 82 | ``` 83 | model_repository/ 84 | `-- fil 85 | |-- 1 86 | | `-- model.pkl 87 | `-- config.pbtxt 88 | ``` 89 | 90 | Then perform the conversion by running either: 91 | ```bash 92 | ./convert_sklearn.py model_repository/fil/1/model.pkl 93 | ``` 94 | for Scikit-Learn models or 95 | ```bash 96 | ./convert_cuml.py model_repository/fil/1/model.pkl 97 | ``` 98 | for cuML models. This will generate a `checkpoint.tl` file in the model 99 | repository in the necessary location. You can then proceed as with any other 100 | model type, setting the `model_type` parameter in `config.pbtxt` to 101 | `"treelite_checkpoint"`. 102 | 103 | Note that Treelite did not guarantee compatibility between minor release 104 | versions for its binary checkpoint model until version 3.0.0 and does not 105 | guarantee compatibility between major releases, so it is recommended that you 106 | keep the original pickle file. If you later make use of a newer version of 107 | Treelite, you can simple re-run the conversion on this pickle file. 108 | -------------------------------------------------------------------------------- /docs/tests.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Running Tests 30 | 31 | For developers working on the FIL backend, the easiest way to run tests is to 32 | invoke the `ci/local/build.sh` script, which will build the server image 33 | and a test image then run a container based on that image which runs the 34 | complete test suite. 35 | 36 | One of the most time-consuming parts of running the test suite is 37 | training the end-to-end test models. The `ci/local/build.sh` script will 38 | cache trained models between runs in `qa/L0_e2e/model_repository` and 39 | `qa/L0_e2e/cpu_model_repository`. Sometimes, you may make a change which 40 | invalidates previously generated models. In such cases, you can clear these 41 | directories in order to start fresh. 42 | 43 | The `ci/local/build.sh` script uses the following environment variables to 44 | control build and execution of tests: 45 | 46 | - `RETRAIN`: If set to 1, retrain test models. 47 | - `USE_CLIENT_WHEEL`: If set to 1, install the Triton client from a wheel 48 | copied from Triton's SDK image. This is useful for testing on ARM 49 | machines, where the Triton Python client is not available via pip. 50 | - `SDK_IMAGE`: If set, copy the Triton client wheel from this specific Docker 51 | SDK image 52 | - `HOST_BUILD`: Build on the host rather than via Docker. This can be useful 53 | for rapid iteration during development. 54 | - `TEST_PROFILE`: Either "dev" or "ci". This variable supplies the name of the 55 | Hypothesis testing profile to use when running tests. The "ci" profile 56 | runs more examples while the "dev" profile executes more quickly. Default 57 | is "dev". 58 | 59 | ## The CI Test Script 60 | In addition to `ci/local/build.sh`, the repo contains a 61 | `ci/gitlab/build.sh` script which is used to run tests in CI. It is 62 | sometimes useful to invoke this script to more closely replicate the CI 63 | environment. This script does *not* cache models in between runs and will 64 | generally run more and slower tests than those used for the `local` script. 65 | 66 | The `ci/gitlab/build.sh` script uses the following environment variables 67 | to control build and execution of tests: 68 | 69 | - `PREBUILT_SERVER_TAG`: Use this Docker image as the Triton server image 70 | to test rather than building it. 71 | - `PREBUILT_TEST_TAG`: Use this Docker image as the Triton test image rather 72 | than building it on top of the server image. 73 | - `MODEL_BUILDER_IMAGE`: Use this Docker image to train test models rather 74 | than building an image. 75 | - `LOG_DIR`: A host directory used for storing test logs 76 | - `NV_DOCKER_ARGS`: A bash expression that when evaluated returns Docker 77 | arguments used for controlling GPU access in CI 78 | - `BUILDPY`: If set to 1, build with Triton's `build.py` script rather than 79 | the FIL backend Dockerfile. 80 | - `CPU_ONLY`: If set to 1, build without GPU support. 81 | - `NO_CACHE`: Set to 0 to enable Docker cache. **By default, caching is 82 | disabled.** 83 | - `USE_CLIENT_WHEEL`: If set to 1, install the Triton client from a wheel 84 | copied from Triton's SDK image. This is useful for testing on ARM 85 | machines, where the Triton Python client is not available via pip. 86 | - `SDK_IMAGE`: If set, copy the Triton client wheel from this specific Docker 87 | SDK image 88 | 89 | ## Running Tests Manually 90 | It is *strongly* recommended that you use the provided test scripts for running 91 | tests. If you wish to run tests manually, you must generate test models using 92 | the `qa/generate_example_models.sh` script, start the Triton server against 93 | the generated model repository, and then run `pytest --repo qa/L0_e2e/model_repository qa/L0_e2e`. 94 | 95 | This approach is not an officially supported testing method, and minimal 96 | support will be provided for it. If you find it useful and wish to 97 | contribute documentation to make this method easier, pull requests are 98 | welcome. 99 | -------------------------------------------------------------------------------- /docs/workflow.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Development Workflow 30 | The `ci/local/build.sh` script is intended to help automate build and testing 31 | during development. Usually it is sufficient to make a change and then run 32 | this script without arguments to validate the change. 33 | 34 | For tasks which require frequent rebuilds, it is sometimes slightly faster 35 | to build on the host rather than in the container. In this case, running 36 | `HOST_BUILD=1 ./ci/local/build.sh` using the [rapids\_triton\_dev conda environment](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/rapids_triton_dev.yml) will perform the build of the FIL backend on the host but then use Docker to execute the tests in a controlled environment. 37 | 38 | For complete information on other options that can be used with this script, 39 | see the [documentation on running tests](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/rapids_triton_dev.yml). 40 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # FIL Backend Examples 30 | 31 | This directory contains example notebooks which illustrate typical workflows 32 | and use-cases for the Triton FIL backend. Additional examples will be added to 33 | this directory over time. 34 | 35 | Each subdirectory contains an example notebook and a README with instructions 36 | on how to run the example. 37 | 38 | ## Current Examples 39 | - [Categorical Fraud 40 | Example](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection): 41 | This introductory example walks through training a categorical XGBoost model for fraud 42 | detection and deploying it on both GPU-accelerated and CPU-only systems. 43 | - [FAQ 44 | Notebook](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/faq): 45 | This notebook answers a series of frequently asked questions around the FIL 46 | backend for Triton and offers example code with practical applications of 47 | those answers. 48 | 49 | ## Deprecated Examples 50 | - [Simple 51 | XGBoost](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/simple-xgboost): 52 | This example has been superseded by the Categorical Fraud Example, which 53 | offers a more succinct and up-to-date example of how to train and deploy an 54 | XGBoost model. 55 | -------------------------------------------------------------------------------- /notebooks/categorical-fraud-detection/README.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Fraud Detection With Categorical XGBoost 30 | 31 | This example notebook shows how to train and deploy an XGBoost model 32 | with categorical features in Triton using the FIL backend. We begin by training 33 | two separate models on a fraud detection dataset with categorical variables: 34 | one small model designed to maximize runtime performance and one larger model 35 | designed to maximize accurate and precise detection of fraud. We then deploy 36 | both models on CPU and GPU and compare their performance using Triton's 37 | `perf_analyzer`. Based on these results, we see that GPU deployment opens up 38 | the possibility of deploying a much larger and more accurate fraud model with 39 | higher throughput while also keeping to a tight latency budget. 40 | 41 | ## Running the notebook 42 | In order to launch the Triton server, you will need 43 | [Docker](https://docs.docker.com/get-docker/) installed on your system. The 44 | rest of the notebook also requires a few Python dependencies. To easily install 45 | these additional dependencies, you may make use of the provided conda 46 | [environment 47 | file](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection/environment.yml) 48 | as follows: 49 | ```bash 50 | conda env create -f environment.yml 51 | ``` 52 | You may then activate the conda environment and run the notebook as usual: 53 | ```bash 54 | conda activate triton_example 55 | jupyter notebook 56 | ``` 57 | The Jupyter interface should now be accessible from a browser, and you can 58 | follow the instructions within the notebook itself from there. 59 | -------------------------------------------------------------------------------- /notebooks/categorical-fraud-detection/environment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: triton_example 3 | channels: 4 | - conda-forge 5 | - nvidia 6 | - rapidsai 7 | dependencies: 8 | - cudatoolkit=12.8 9 | - cudf=25.04 10 | - cuml=25.04 11 | - cupy 12 | - jupyter 13 | - kaggle 14 | - matplotlib 15 | - numpy 16 | - pandas 17 | - pip 18 | - python 19 | - rapidsai::xgboost>=2.1 20 | - scikit-learn>=1.5 21 | - pip: 22 | - tritonclient[all] 23 | - protobuf 24 | -------------------------------------------------------------------------------- /notebooks/faq/README.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # FAQs and Advanced Features 30 | 31 | Designed as a complete reference to features of the FIL backend and common 32 | tasks performed with it, this notebook provides answers to a series of FAQs 33 | along with code snippets demonstrating how to make practical use of those 34 | answers. 35 | 36 | If you have never made use of the FIL backend before, it is recommended that 37 | you begin with the introductory [fraud detection notebook](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection#fraud-detection-with-categorical-xgboost). After working through this basic example, the FAQs notebook will offer answers to questions that go beyond the basics in order to get the most out of the FIL backend. 38 | 39 | ## Running the notebook 40 | In order to launch the Triton server, you will need 41 | [Docker](https://docs.docker.com/get-docker/) installed on your system. The 42 | rest of the notebook also requires a few Python dependencies. To easily install 43 | these additional dependencies, you may make use of the provided conda 44 | [environment 45 | file](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/faq/environment.yml) 46 | as follows: 47 | ```bash 48 | conda env create -f environment.yml 49 | ``` 50 | You may then activate the conda environment and run the notebook as usual: 51 | ```bash 52 | conda activate triton_faq_nb 53 | jupyter notebook 54 | ``` 55 | The Jupyter interface should now be accessible from a browser, and you can 56 | follow the instructions within the notebook itself from there. 57 | 58 | Note that depending on which model framework you choose to use with this 59 | notebook, you may not need all the dependencies listed in the conda environment 60 | file. Remove any that you do not wish to install before installing the 61 | environment. 62 | -------------------------------------------------------------------------------- /notebooks/faq/environment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: triton_faq_nb 3 | channels: 4 | - conda-forge 5 | - nvidia 6 | - rapidsai 7 | dependencies: 8 | - cudatoolkit=12.8 9 | - cuml=25.04 10 | - joblib 11 | - jupyter 12 | - lightgbm 13 | - numpy 14 | - pandas 15 | - pip 16 | - python 17 | - skl2onnx 18 | - treelite>=4.4 19 | - rapidsai::xgboost>=2.1 20 | - scikit-learn>=1.5 21 | - pip: 22 | - protobuf 23 | - tritonclient[all] 24 | -------------------------------------------------------------------------------- /notebooks/simple-xgboost/README.md: -------------------------------------------------------------------------------- 1 | # (DEPRECATED) Triton FIL backend with XGBoost 2 | 3 | **THIS NOTEBOOK HAS BEEN DEPRECATED. FOR A SIMPLE AND CONCISE INTRODUCTION TO TRAINING AND DEPLOYING AN XGBOOST MODEL WITH THE FIL BACKEND, PLEASE SEE THE [CATEGORICAL FRAUD DETECTION](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection) EXAMPLE NOTEBOOK.** 4 | 5 | This notebook will eventually be reworked, split into smaller parts, and reintroduced for a later release. It is left here for historical reference, but some cells are known not to work with the latest versions of various Triton components. 6 | 7 | This notebook is a reference for deploying an XGBoost model on Triton with the FIL backend. The notebook explains how one can deploy XGBoost model in Triton, check deployment status and send inference requests, set concurrent model execution and dynamic batching and find the best deployment configuration using Model Analyzer. 8 | 9 | ## Requirements 10 | * NVIDIA GPU (Pascal+ required, recommended GPUs: T4, V100 or A100) 11 | * [Latest NVIDIA driver](https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html) 12 | * [Docker](https://docs.docker.com/get-docker/) 13 | * [The NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) 14 | 15 | ## Run the Triton Inference Server container 16 | 17 | **Note:** Due to a bug in release 21.07, Triton's `model_analyzer` cannot be used with the FIL backend. If you wish to use the model analyzer, please use release 21.08 or later. 18 | 19 | Before running the container, clone the repository and then run the container: 20 | 21 | ``` 22 | git clone https://github.com/triton-inference-server/fil_backend.git 23 | cd fil_backend 24 | 25 | docker run \ 26 | -it \ 27 | --gpus=all \ 28 | --rm \ 29 | --net=host \ 30 | --name triton_fil \ 31 | nvcr.io/nvidia/tritonserver: # Put the appropriate tag here. 32 | ``` 33 | 34 | **Note:** The artifacts created by scripts inside the container are created with root permission. The user on host machine might not be able to modify the artifacts once the container exists. To avoid this issue, copy the notebook `docker cp simple_xgboost_example.ipynb ` and create the artifacts inside the container. 35 | 36 | Now open up another terminal and copy the notebook from host into the container as follows: 37 | ``` 38 | docker cp notebooks/ triton_fil:/ 39 | ``` 40 | 41 | ## Starting Jupyter notebook 42 | In the previous terminal perform the following steps: 43 | 44 | ### Install Jupyter notebook inside the Triton container 45 | ``` 46 | pip3 install jupyter 47 | ``` 48 | ### Run Jupyter notebook inside the Triton container 49 | Change directory to `/notebooks` folder and run the jupyter notebook: 50 | ``` 51 | cd /notebooks 52 | jupyter notebook --allow-root --no-browser --port 7001 53 | ``` 54 | 55 | -------------------------------------------------------------------------------- /ops/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax = docker/dockerfile:1.3 2 | ########################################################################################### 3 | # Arguments for controlling build details 4 | ########################################################################################### 5 | # Version of Triton to use 6 | ARG TRITON_VERSION=25.05 7 | # Base container image 8 | ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 9 | # Whether or not to enable GPU build 10 | ARG TRITON_ENABLE_GPU=ON 11 | # A Triton server image to use as base for test layers (skip actual build) 12 | ARG SERVER_IMAGE=build-stage 13 | # Whether or not to install Triton client from wheel in SDK image 14 | ARG USE_CLIENT_WHEEL=0 15 | # SDK container image (only used if USE_CLIENT_WHEEL==1) 16 | ARG SDK_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-sdk 17 | # Whether or not to use backend library prebuilt on host 18 | ARG USE_HOST_LIB=0 19 | 20 | FROM condaforge/miniforge3 as conda-base 21 | COPY ./ops/gpuci_conda_retry /usr/bin/gpuci_conda_retry 22 | COPY ./ops/gpuci_mamba_retry /usr/bin/gpuci_mamba_retry 23 | RUN chmod +x /usr/bin/gpuci_conda_retry /usr/bin/gpuci_mamba_retry 24 | 25 | RUN mkdir /conda 26 | RUN gpuci_mamba_retry install -c conda-forge conda-pack=0.7 27 | 28 | FROM conda-base as conda-dev 29 | COPY ./conda/environments/rapids_triton_dev.yml /conda/environment.yml 30 | RUN gpuci_mamba_retry create -n rapids_triton_dev \ 31 | && gpuci_mamba_retry env update -n rapids_triton_dev -f /conda/environment.yml \ 32 | && rm /conda/environment.yml 33 | RUN conda-pack -n rapids_triton_dev -o /tmp/env.tar \ 34 | && mkdir /conda/dev/ \ 35 | && cd /conda/dev/ \ 36 | && tar xf /tmp/env.tar \ 37 | && rm /tmp/env.tar 38 | RUN /conda/dev/bin/conda-unpack 39 | 40 | # Stage for installing test dependencies 41 | FROM conda-base as base-test-install 42 | COPY ./conda/environments/triton_test_no_client.yml /environment.yml 43 | 44 | RUN gpuci_mamba_retry create -n triton_test \ 45 | && gpuci_mamba_retry env update -n triton_test -f /environment.yml \ 46 | && rm /environment.yml 47 | 48 | FROM base-test-install as wheel-install-0 49 | RUN apt-get update \ 50 | && apt-get install --no-install-recommends -y \ 51 | build-essential \ 52 | ca-certificates \ 53 | git \ 54 | && apt-get clean \ 55 | && rm -rf /var/lib/apt/lists/* \ 56 | && conda run --no-capture-output -n triton_test pip install tritonclient[all] 57 | 58 | FROM ${SDK_IMAGE} as sdk-image 59 | 60 | FROM base-test-install as wheel-install-1 61 | COPY --from=sdk-image /workspace/install/python /sdk_install 62 | RUN conda run --no-capture-output -n triton_test \ 63 | pip install /sdk_install/tritonclient*manylinux*.whl \ 64 | && rm -r /sdk_install 65 | 66 | FROM wheel-install-${USE_CLIENT_WHEEL} as conda-test 67 | RUN conda run --no-capture-output -n triton_test \ 68 | pip install git+https://github.com/rapidsai/rapids-triton.git@branch-25.06#subdirectory=python 69 | RUN conda-pack --ignore-missing-files -n triton_test -o /tmp/env.tar \ 70 | && mkdir /conda/test/ \ 71 | && cd /conda/test/ \ 72 | && tar xf /tmp/env.tar \ 73 | && rm /tmp/env.tar 74 | RUN /conda/test/bin/conda-unpack 75 | 76 | 77 | FROM ${BASE_IMAGE} as base 78 | 79 | ENV PATH="/root/miniconda3/bin:${PATH}" 80 | 81 | # In CI, CPU base image may not have curl, but it also does not need to update 82 | # the cuda keys 83 | RUN if command -v curl; \ 84 | then [ $(uname -m) = 'x86_64' ] \ 85 | && curl -L -o /tmp/cuda-keyring.deb \ 86 | https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \ 87 | || curl -L -o /tmp/cuda-keyring.deb \ 88 | https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/cuda-keyring_1.0-1_all.deb; \ 89 | dpkg -i /tmp/cuda-keyring.deb \ 90 | && rm /tmp/cuda-keyring.deb; fi 91 | 92 | RUN apt-get update \ 93 | && apt-get install --no-install-recommends -y \ 94 | build-essential \ 95 | ca-certificates \ 96 | git \ 97 | && apt-get clean \ 98 | && rm -rf /var/lib/apt/lists/* 99 | 100 | # Stage immediately before building; useful for build iteration 101 | FROM base as build-prep 102 | 103 | RUN mkdir -p /rapids_triton/build /rapids_triton/install 104 | 105 | COPY ./src /rapids_triton/src 106 | COPY ./CMakeLists.txt /rapids_triton 107 | COPY ./cmake /rapids_triton/cmake 108 | 109 | ARG BACKEND_NAME=fil 110 | ENV BACKEND_NAME=$BACKEND_NAME 111 | 112 | WORKDIR /rapids_triton/build 113 | 114 | # Remove potentially stale build artifacts 115 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \ 116 | then \ 117 | rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \ 118 | else \ 119 | mkdir -p /opt/tritonserver/backends/${BACKEND_NAME}; \ 120 | fi 121 | 122 | # Stage where build actually takes place 123 | FROM build-prep as build-stage 124 | 125 | ARG TRITON_VERSION 126 | ENV TRITON_VERSION=$TRITON_VERSION 127 | 128 | ARG BUILD_TYPE=Release 129 | ENV BUILD_TYPE=$BUILD_TYPE 130 | ARG BUILD_TESTS 131 | ENV BUILD_TESTS=$BUILD_TESTS 132 | ARG BUILD_EXAMPLE 133 | ENV BUILD_EXAMPLE=$BUILD_EXAMPLE 134 | 135 | ARG TRITON_REPO_ORGANIZATION=https://github.com/triton-inference-server 136 | ENV TRITON_REPO_ORGANIZATION=$TRITON_REPO_ORGANIZATION 137 | ARG TRITON_CORE_REPO_TAG=r${TRITON_VERSION} 138 | ENV TRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG 139 | ARG TRITON_COMMON_REPO_TAG=r${TRITON_VERSION} 140 | ENV TRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG 141 | ARG TRITON_BACKEND_REPO_TAG=r${TRITON_VERSION} 142 | ENV TRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG 143 | ARG RAPIDS_TRITON_REPO_TAG=main 144 | ENV RAPIDS_TRITON_REPO_TAG=$RAPIDS_TRITON_REPO_TAG 145 | ARG RAPIDS_TRITON_REPO_PATH=https://github.com/rapidsai/rapids-triton.git 146 | ENV RAPIDS_TRITON_REPO_PATH=$RAPIDS_TRITON_REPO_PATH 147 | 148 | ARG TRITON_ENABLE_GPU=ON 149 | ENV TRITON_ENABLE_GPU=$TRITON_ENABLE_GPU 150 | ARG TRITON_ENABLE_STATS=ON 151 | ENV TRITON_ENABLE_GPU=$TRITON_ENABLE_GPU 152 | 153 | # Specify *minimum* version for all RAPIDS dependencies 154 | # Some RAPIDS deps may have later versions 155 | ARG RAPIDS_DEPENDENCIES_VERSION=25.06 156 | ENV RAPIDS_DEPENDENCIES_VERSION=$RAPIDS_DEPENDENCIES_VERSION 157 | 158 | ARG TRITON_FIL_USE_TREELITE_STATIC=ON 159 | ENV TRITON_FIL_USE_TREELITE_STATIC=$TRITON_FIL_USE_TREELITE_STATIC 160 | 161 | COPY --from=conda-dev /conda/dev /conda/dev 162 | 163 | SHELL ["/bin/bash", "-c"] 164 | 165 | RUN source /conda/dev/bin/activate \ 166 | && cmake \ 167 | --log-level=VERBOSE \ 168 | -GNinja \ 169 | -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \ 170 | -DBUILD_TESTS="${BUILD_TESTS}" \ 171 | -DTRITON_REPO_ORGANIZATION="${TRITON_REPO_ORGANIZATION}" \ 172 | -DTRITON_CORE_REPO_TAG="${TRITON_CORE_REPO_TAG}" \ 173 | -DTRITON_COMMON_REPO_TAG="${TRITON_COMMON_REPO_TAG}" \ 174 | -DTRITON_BACKEND_REPO_TAG="${TRITON_BACKEND_REPO_TAG}" \ 175 | -DRAPIDS_TRITON_REPO_TAG="${RAPIDS_TRITON_REPO_TAG}" \ 176 | -DRAPIDS_TRITON_REPO_PATH="${RAPIDS_TRITON_REPO_PATH}" \ 177 | -DTRITON_ENABLE_GPU="${TRITON_ENABLE_GPU}" \ 178 | -DTRITON_ENABLE_STATS="${TRITON_ENABLE_STATS}" \ 179 | -DRAPIDS_DEPENDENCIES_VERSION="${RAPIDS_DEPENDENCIES_VERSION}" \ 180 | -DTRITON_FIL_USE_TREELITE_STATIC="${TRITON_FIL_USE_TREELITE_STATIC}" \ 181 | -DCMAKE_INSTALL_PREFIX=/rapids_triton/install \ 182 | ..; 183 | 184 | ENV CCACHE_DIR=/ccache 185 | 186 | ARG CCACHE_REMOTE_STORAGE 187 | 188 | RUN --mount=type=cache,target=/ccache/ source /conda/dev/bin/activate && \ 189 | if [ -n "${CCACHE_REMOTE_STORAGE}" ] && which ccache ; then \ 190 | ccache --set-config=remote_only=true ; \ 191 | ccache --set-config=remote_storage=${CCACHE_REMOTE_STORAGE} ; \ 192 | ccache -p ; \ 193 | fi && \ 194 | ninja install 195 | 196 | # Stage for generating testing image 197 | FROM ${SERVER_IMAGE} as test-host-0 198 | FROM ${SERVER_IMAGE} as test-host-1 199 | 200 | ARG BACKEND_NAME=fil 201 | ENV BACKEND_NAME=$BACKEND_NAME 202 | 203 | # Remove existing FIL backend install 204 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \ 205 | then \ 206 | rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \ 207 | fi 208 | COPY ./install/backends/fil /opt/tritonserver/backends/${BACKEND_NAME} 209 | 210 | FROM test-host-${USE_HOST_LIB} as test-build 211 | 212 | FROM ${SERVER_IMAGE} as test-stage 213 | ARG BACKEND_NAME=fil 214 | ENV BACKEND_NAME=$BACKEND_NAME 215 | 216 | COPY --from=conda-test /conda/test /conda/test 217 | 218 | # Remove existing FIL backend install 219 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \ 220 | then \ 221 | rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \ 222 | fi 223 | COPY --from=test-build \ 224 | /opt/tritonserver/backends/$BACKEND_NAME \ 225 | /opt/tritonserver/backends/$BACKEND_NAME 226 | 227 | COPY qa /qa 228 | COPY scripts /scripts 229 | 230 | ENTRYPOINT [] 231 | CMD ["/bin/bash", "-c", "source /conda/test/bin/activate && /qa/entrypoint.sh"] 232 | 233 | FROM ${BASE_IMAGE} as final 234 | 235 | ARG BACKEND_NAME=fil 236 | ENV BACKEND_NAME=$BACKEND_NAME 237 | 238 | RUN mkdir /models 239 | 240 | # Remove existing FIL backend install 241 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \ 242 | then \ 243 | rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \ 244 | fi 245 | 246 | COPY --from=build-stage \ 247 | /opt/tritonserver/backends/$BACKEND_NAME \ 248 | /opt/tritonserver/backends/$BACKEND_NAME 249 | -------------------------------------------------------------------------------- /ops/E2E.md: -------------------------------------------------------------------------------- 1 | 28 | 29 | # Customized End-to-End Builds 30 | 31 | **This build option has been removed in version 21.11. It may be re-introduced 32 | at a later date. Please file an issue if you have need for greater build 33 | customization than is provided by standard build options.** 34 | -------------------------------------------------------------------------------- /ops/gpuci_conda_retry: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script taken from the RAPIDS gpuci_tools repo: 3 | # https://github.com/rapidsai/gpuci-tools/blob/215d652dbef6f35c13597812f38058e82520b8e5/tools/gpuci_conda_retry 4 | # 5 | # gpuci_conda_retry 6 | # 7 | # wrapper for conda that retries the command after a CondaHTTPError, 8 | # ChecksumMismatchError, or JSONDecodeError (ideally, any conda error that 9 | # is normally resolved by retrying) 10 | # 11 | # This must be set in order for the script to recognize failing exit codes when 12 | # output is piped to tee 13 | # 14 | # Example usage: 15 | # $ gpuci_conda_retry install cudatoolkit=11.0 rapids=0.16 16 | # 17 | # Configurable options are set using the following env vars: 18 | # 19 | # GPUCI_CONDA_RETRY_MAX - set to a positive integer to set the max number of retry 20 | # attempts (attempts after the initial try). 21 | # Default is 3 retries 22 | # 23 | # GPUCI_CONDA_RETRY_SLEEP - set to a positive integer to set the duration, in 24 | # seconds, to wait between retries. 25 | # Default is a 10 second sleep 26 | # 27 | set -o pipefail 28 | 29 | condaretry_help=" 30 | gpuci_conda_retry options: 31 | 32 | --condaretry_max_retries=n Retry the conda command at most n times (default is 3) 33 | --condaretry_sleep_interval=n Sleep n seconds between retries (default is 5) 34 | 35 | ALSO gpuci_conda_retry options can be set using the following env vars: 36 | 37 | GPUCI_CONDA_RETRY_MAX - set to a positive integer to set the max number of retry 38 | attempts (attempts after the initial try). 39 | Default is 3 retries 40 | 41 | GPUCI_CONDA_RETRY_SLEEP - set to a positive integer to set the duration, in 42 | seconds, to wait between retries. 43 | Default is a 10 second sleep 44 | ========== 45 | " 46 | max_retries=${GPUCI_CONDA_RETRY_MAX:=3} 47 | sleep_interval=${GPUCI_CONDA_RETRY_SLEEP:=10} 48 | exitcode=0 49 | needToRetry=0 50 | retries=0 51 | args="" 52 | 53 | # Temporarily set this to something else (eg. a script called "testConda" that 54 | # prints "CondaHTTPError:" and exits with 1) for testing this script. 55 | #condaCmd=./testConda 56 | condaCmd=${CONDA_EXE:=conda} 57 | 58 | # Function to output messages to stderr 59 | # FIXME - extend `gpuci_logger` or make another script for this 60 | function echo_stderr { 61 | echo " [gpuci_conda_retry] $@" >&2 62 | } 63 | 64 | # Function to run conda and check output for specific retryable errors 65 | # input variables: 66 | # condaCmd: the command used for running conda, which accepts the args 67 | # passed to this script 68 | # outfile: file to tee output to for checking, likely a temp file 69 | # output variables: 70 | # exitcode: the exit code from running ${condaCmd} ${args} 71 | # needToRetry: 1 if the command should be retried, 0 if it should not be 72 | function runConda { 73 | ${condaCmd} ${args} 2>&1| tee ${outfile} 74 | exitcode=$? 75 | needToRetry=0 76 | retryingMsg="" 77 | 78 | if (( ${exitcode} != 0 )); then 79 | # Show exit code 80 | echo_stderr "conda returned exit code: ${exitcode}" 81 | 82 | if grep -q CondaHTTPError: ${outfile}; then 83 | retryingMsg="Retrying, found 'CondaHTTPError:' in output..." 84 | needToRetry=1 85 | elif grep -q ChecksumMismatchError: ${outfile}; then 86 | retryingMsg="Retrying, found 'ChecksumMismatchError:' in output..." 87 | needToRetry=1 88 | elif grep -q JSONDecodeError: ${outfile}; then 89 | retryingMsg="Retrying, found 'JSONDecodeError:' in output..." 90 | needToRetry=1 91 | elif grep -q ChunkedEncodingError: ${outfile}; then 92 | retryingMsg="Retrying, found 'ChunkedEncodingError:' in output..." 93 | needToRetry=1 94 | elif grep -q CondaMultiError: ${outfile}; then 95 | retryingMsg="Retrying, found 'CondaMultiError:' in output..." 96 | needToRetry=1 97 | elif grep -q EOFError: ${outfile}; then 98 | retryingMsg="Retrying, found 'EOFError:' in output..." 99 | needToRetry=1 100 | else 101 | echo_stderr "Exiting, no retryable conda errors detected: 'ChecksumMismatchError:' or 'CondaHTTPError:' or 'JSONDecodeError:' or 'ChunkedEncodingError:' or 'CondaMultiError:' or 'EOFError:'" 102 | fi 103 | 104 | if (( ${needToRetry} == 1 )) && \ 105 | (( ${retries} >= ${max_retries} )); then 106 | # Catch instance where we run out of retries 107 | echo_stderr "Exiting, reached max retries..." 108 | else 109 | # Give reason for retry 110 | echo_stderr $retryingMsg 111 | fi 112 | fi 113 | } 114 | 115 | 116 | # Process and remove args recognized only by this script, save others for conda 117 | # Process help separately 118 | for arg in $*; do 119 | opt=${arg%%=*} 120 | val=${arg##*=} 121 | if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then 122 | echo "${condaretry_help}" 123 | ${condaCmd} --help 124 | exit $? 125 | elif [[ ${opt} == "--condaretry_max_retries" ]]; then 126 | max_retries=${val} 127 | elif [[ ${opt} == "--condaretry_sleep_interval" ]]; then 128 | sleep_interval=${val} 129 | else 130 | args="${args} ${arg}" 131 | fi 132 | done 133 | 134 | # Run command 135 | outfile=$(mktemp) 136 | runConda ${args} 137 | 138 | # Retry loop, only if needed 139 | while (( ${needToRetry} == 1 )) && \ 140 | (( ${retries} < ${max_retries} )); do 141 | 142 | retries=$(expr ${retries} + 1) 143 | echo_stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..." 144 | sleep ${sleep_interval} 145 | echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..." 146 | 147 | runConda ${args} 148 | done 149 | 150 | rm -f ${outfile} 151 | exit ${exitcode} 152 | -------------------------------------------------------------------------------- /ops/gpuci_mamba_retry: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script taken from the RAPIDS gpuci_tools repo: 3 | # https://github.com/rapidsai/gpuci-tools/blob/215d652dbef6f35c13597812f38058e82520b8e5/tools/gpuci_mamba_retry 4 | # 5 | # gpuci_mamba_retry 6 | # 7 | # Wrapper for conda that retries the command after a CondaHTTPError, 8 | # ChecksumMismatchError, or JSONDecodeError (ideally, any conda error that 9 | # is normally resolved by retrying) 10 | # 11 | # This must be set in order for the script to recognize failing exit codes when 12 | # output is piped to tee 13 | # 14 | # Example usage: 15 | # $ gpuci_mamba_retry install cudatoolkit=11.0 rapids=0.16 16 | # 17 | # Configurable options are set using the following env vars: 18 | # 19 | # GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry 20 | # attempts (attempts after the initial try). 21 | # Default is 3 retries 22 | # 23 | # GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in 24 | # seconds, to wait between retries. 25 | # Default is a 10 second sleep 26 | # 27 | set -o pipefail 28 | 29 | mambaretry_help=" 30 | gpuci_mamba_retry options: 31 | 32 | --mambaretry_max_retries=n Retry the conda command at most n times (default is 3) 33 | --mambaretry_sleep_interval=n Sleep n seconds between retries (default is 5) 34 | 35 | ALSO gpuci_mamba_retry options can be set using the following env vars: 36 | 37 | GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry 38 | attempts (attempts after the initial try). 39 | Default is 3 retries 40 | 41 | GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in 42 | seconds, to wait between retries. 43 | Default is a 10 second sleep 44 | ========== 45 | " 46 | max_retries=${GPUCI_MAMBA_RETRY_MAX:=3} 47 | sleep_interval=${GPUCI_MAMBA_RETRY_SLEEP:=10} 48 | exitcode=0 49 | needToRetry=0 50 | retries=0 51 | args="" 52 | 53 | # Temporarily set this to something else (eg. a script called "testConda" that 54 | # prints "CondaHTTPError:" and exits with 1) for testing this script. 55 | #mambaCmd=./testConda 56 | mambaCmd=${MAMBA_BIN:=mamba} 57 | 58 | # Function to output messages to stderr 59 | # FIXME - extend `gpuci_logger` or make another script for this 60 | function echo_stderr { 61 | echo " [gpuci_mamba_retry] $@" >&2 62 | } 63 | 64 | # Function to run conda and check output for specific retryable errors 65 | # input variables: 66 | # mambaCmd: the command used for running conda, which accepts the args 67 | # passed to this script 68 | # outfile: file to tee output to for checking, likely a temp file 69 | # output variables: 70 | # exitcode: the exit code from running ${mambaCmd} ${args} 71 | # needToRetry: 1 if the command should be retried, 0 if it should not be 72 | function runMamba { 73 | ${mambaCmd} ${args} 2>&1| tee ${outfile} 74 | exitcode=$? 75 | needToRetry=0 76 | retryingMsg="" 77 | 78 | if (( ${exitcode} != 0 )); then 79 | # Show exit code 80 | echo_stderr "Failed, mamba returned exit code: ${exitcode}" 81 | 82 | if grep -q CondaHTTPError: ${outfile}; then 83 | retryingMsg="Retrying, found 'CondaHTTPError:' in output..." 84 | needToRetry=1 85 | elif grep -q ChecksumMismatchError: ${outfile}; then 86 | retryingMsg="Retrying, found 'ChecksumMismatchError:' in output..." 87 | needToRetry=1 88 | elif grep -q JSONDecodeError: ${outfile}; then 89 | retryingMsg="Retrying, found 'JSONDecodeError:' in output..." 90 | needToRetry=1 91 | elif grep -q EOFError: ${outfile}; then 92 | retryingMsg="Retrying, found 'EOFError:' in output..." 93 | needToRetry=1 94 | else 95 | echo_stderr "Exiting, no retryable mamba errors detected: 'ChecksumMismatchError:' or 'CondaHTTPError:' or 'JSONDecodeError:' or 'EOFError:'" 96 | fi 97 | 98 | if (( ${needToRetry} == 1 )) && \ 99 | (( ${retries} >= ${max_retries} )); then 100 | # Catch instance where we run out of retries 101 | echo_stderr "Exiting, reached max retries..." 102 | else 103 | # Give reason for retry 104 | echo_stderr $retryingMsg 105 | fi 106 | fi 107 | } 108 | 109 | 110 | # Process and remove args recognized only by this script, save others for conda 111 | # Process help separately 112 | for arg in $*; do 113 | opt=${arg%%=*} 114 | val=${arg##*=} 115 | if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then 116 | echo "${mambaretry_help}" 117 | ${mambaCmd} --help 118 | exit $? 119 | elif [[ ${opt} == "--mambaretry_max_retries" ]]; then 120 | max_retries=${val} 121 | elif [[ ${opt} == "--mambaretry_sleep_interval" ]]; then 122 | sleep_interval=${val} 123 | else 124 | args="${args} ${arg}" 125 | fi 126 | done 127 | 128 | # Run command 129 | outfile=$(mktemp) 130 | runMamba ${args} 131 | 132 | # Retry loop, only if needed 133 | while (( ${needToRetry} == 1 )) && \ 134 | (( ${retries} < ${max_retries} )); do 135 | 136 | retries=$(expr ${retries} + 1) 137 | echo_stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..." 138 | sleep ${sleep_interval} 139 | echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..." 140 | 141 | runMamba ${args} 142 | done 143 | 144 | rm -f ${outfile} 145 | exit ${exitcode} 146 | -------------------------------------------------------------------------------- /ops/move_deps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import re 5 | import shutil 6 | import subprocess 7 | from pathlib import Path 8 | 9 | MISSING_REGEX = re.compile(r"\n\t(.+)\ =>\ not\ found") 10 | FOUND_REGEX = re.compile(r"\n\t(.+)\ =>\ (.+)\ (\(0[xX][0-9a-fA-F]+\))") 11 | 12 | 13 | def ldd(path): 14 | """Get output of ldd for given file""" 15 | ldd_out = subprocess.run(["ldd", path], check=True, capture_output=True, text=True) 16 | return ldd_out.stdout 17 | 18 | 19 | def get_missing_deps(ldd_output): 20 | """Return iterator of missing dependencies in ldd output""" 21 | for match in MISSING_REGEX.finditer(ldd_output): 22 | yield match.group(1) 23 | 24 | 25 | def path_contains(parent, child): 26 | """Check if first path contains the child path""" 27 | parent = os.path.abspath(parent) 28 | child = os.path.abspath(child) 29 | return parent == os.path.commonpath([parent, child]) 30 | 31 | 32 | def get_deps_map(ldd_output, required_dir=None): 33 | """Return dictionary mapping library names to paths""" 34 | deps_map = {} 35 | for match in FOUND_REGEX.finditer(ldd_output): 36 | if required_dir is None or path_contains(required_dir, match.group(2)): 37 | deps_map[match.group(1)] = match.group(2) 38 | return deps_map 39 | 40 | 41 | def move_dependencies(): 42 | """Move FIL backend dependencies from conda build environment to install 43 | directory 44 | 45 | The FIL backend library is built within a a conda environment containing 46 | all required shared libraries for deploying the backend. This function 47 | analyzes ldd output to determine what libraries FIL links against in its 48 | build environment as well as what libraries will be missing in the final 49 | install location. It then moves missing libraries to the final install 50 | location and repeats the analysis until it has satisfied as many missing 51 | dependencies as possible. 52 | """ 53 | fil_lib = os.getenv("FIL_LIB", "libtriton_fil.so") 54 | lib_dir = os.getenv("LIB_DIR", "/usr/lib") 55 | 56 | conda_lib_dir = os.getenv("CONDA_LIB_DIR") 57 | if conda_lib_dir is None: 58 | conda_prefix = os.getenv("CONDA_PREFIX") 59 | if conda_prefix is None: 60 | raise RuntimeError( 61 | "Must set CONDA_LIB_DIR to conda environment lib directory" 62 | ) 63 | conda_lib_dir = os.path.join(conda_prefix, "lib") 64 | 65 | Path(lib_dir).mkdir(parents=True, exist_ok=True) 66 | 67 | # Set RUNPATH to conda lib directory to determine locations of 68 | # conda-provided dependencies 69 | subprocess.run(["patchelf", "--set-rpath", conda_lib_dir, fil_lib], check=True) 70 | 71 | ldd_out = ldd(fil_lib) 72 | expected_missing = set(get_missing_deps(ldd_out)) 73 | deps_map = get_deps_map(ldd_out, required_dir=conda_lib_dir) 74 | 75 | # Set RUNPATH to final dependency directory 76 | subprocess.run(["patchelf", "--set-rpath", lib_dir, fil_lib], check=True) 77 | 78 | prev_missing = { 79 | None, 80 | } 81 | cur_missing = set() 82 | while prev_missing != cur_missing: 83 | prev_missing = cur_missing 84 | cur_missing = set(get_missing_deps(ldd(fil_lib))) 85 | for missing_dep in cur_missing: 86 | try: 87 | lib_path = deps_map[missing_dep] 88 | except KeyError: 89 | continue 90 | shutil.copy(lib_path, lib_dir) 91 | 92 | remaining = cur_missing - expected_missing 93 | if remaining != {}: 94 | print("Could not find the following dependencies:") 95 | for lib in sorted(remaining): 96 | print(lib) 97 | else: 98 | print("All dependencies found") 99 | 100 | 101 | if __name__ == "__main__": 102 | move_dependencies() 103 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of NVIDIA CORPORATION nor the names of its 12 | # contributors may be used to endorse or promote products derived 13 | # from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | [tool.codespell] 28 | # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - 29 | # this is only to allow you to run codespell interactively 30 | skip = "./.git,./.github" 31 | # ignore short words, and typename parameters like OffsetT 32 | ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" 33 | # use the 'clear' dictionary for unambiguous spelling mistakes 34 | builtin = "clear" 35 | # disable warnings about binary files and wrong encoding 36 | quiet-level = 3 37 | 38 | [tool.isort] 39 | profile = "black" 40 | use_parentheses = true 41 | multi_line_output = 3 42 | include_trailing_comma = true 43 | force_grid_wrap = 0 44 | ensure_newline_before_comments = true 45 | line_length = 88 46 | balanced_wrapping = true 47 | indent = " " 48 | skip = ["build"] 49 | 50 | -------------------------------------------------------------------------------- /qa/BENCHMARKS.md: -------------------------------------------------------------------------------- 1 | # FIL Backend Benchmarks 2 | 3 | **WARNING: The models which were used for this benchmarking script have 4 | temporarily been removed. They will be restored using a storage solution other 5 | than Git LFS at a later date.** 6 | 7 | In order to facilitate performance analysis during development of the FIL 8 | backend, the `qa/run_benchmarks.sh` scripts can run a simple set of benchmarks 9 | against standard models. To run this script, first install the benchmarking 10 | conda environment: 11 | ```bash 12 | conda env create -f conda/environments/triton_benchmark.yml 13 | ``` 14 | 15 | Next, start the Triton server with the provided benchmark models. Note that you 16 | will need [git lfs](https://git-lfs.github.com/) to checkout these models. You 17 | may start the server by running the following command from the repo root: 18 | 19 | ```bash 20 | docker run \ 21 | --rm \ 22 | --gpus=all \ 23 | --name benchmark_server \ 24 | -p 8000:8000 \ 25 | -p 8001:8001 \ 26 | -p 8002:8002 \ 27 | -v $PWD/qa/benchmark_repo:/models \ 28 | triton_fil \ 29 | tritonserver \ 30 | --model-repository=/models 31 | ``` 32 | 33 | Here, `triton_fil` is used as the Docker image, since this is the standard tag 34 | used during development, but you may run the benchmarks against any Triton 35 | image which contains the FIL backend. 36 | 37 | In a separate terminal, you may now invoke the benchmark script itself as 38 | follows: 39 | ```bash 40 | conda activate triton_benchmark 41 | ./qa/run_benchmarks.sh 42 | ``` 43 | 44 | The benchmark script will provide output in the `qa/benchmark_output` 45 | directory. Each model tested will have its own directory with `.csv` files 46 | representing results for various batch sizes. The `summary` directory will also 47 | contain a `.csv` collating the data from each run as well as a `.png` showing 48 | throughput vs. p99 latency for all tested models on a single graph. 49 | 50 | The benchmark script can be configured using a few different environment 51 | variables, summarized below: 52 | - `MODELS`: A space-separated list of the models to benchmark (defaults to 53 | standard benchmarking models) 54 | - `BATCHES`: A space-separated list of the batch sizes to use during 55 | benchmarking (defaults to `'1 16 128 1024'`) 56 | - `MAX_LATENCY`: The maximum latency (in ms) to explore during benchmarking 57 | (defaults to 5 ms) 58 | -------------------------------------------------------------------------------- /qa/L0_e2e/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from hypothesis import settings 4 | 5 | settings.register_profile("dev", max_examples=10) 6 | settings.register_profile("ci", max_examples=100) 7 | 8 | 9 | def pytest_addoption(parser): 10 | default_repo_path = os.path.join( 11 | os.path.dirname(os.path.abspath(__file__)), "model_repository" 12 | ) 13 | parser.addoption("--repo", action="store", default=default_repo_path) 14 | -------------------------------------------------------------------------------- /qa/benchmark_repo/large_model-cpu/1/xgboost.json: -------------------------------------------------------------------------------- 1 | ../../large.json -------------------------------------------------------------------------------- /qa/benchmark_repo/large_model-cpu/config.pbtxt: -------------------------------------------------------------------------------- 1 | backend: "fil" 2 | max_batch_size: 6329 3 | input [ 4 | { 5 | name: "input__0" 6 | data_type: TYPE_FP32 7 | dims: [ 393 ] 8 | } 9 | ] 10 | output [ 11 | { 12 | name: "output__0" 13 | data_type: TYPE_FP32 14 | dims: [ 2 ] 15 | } 16 | ] 17 | instance_group [{ kind: KIND_CPU }] 18 | parameters [ 19 | { 20 | key: "model_type" 21 | value: { string_value: "xgboost_json" } 22 | }, 23 | { 24 | key: "predict_proba" 25 | value: { string_value: "true" } 26 | }, 27 | { 28 | key: "output_class" 29 | value: { string_value: "true" } 30 | }, 31 | { 32 | key: "threshold" 33 | value: { string_value: "0.5" } 34 | }, 35 | { 36 | key: "storage_type" 37 | value: { string_value: "AUTO" } 38 | } 39 | ] 40 | 41 | dynamic_batching { } 42 | -------------------------------------------------------------------------------- /qa/benchmark_repo/large_model/1/xgboost.json: -------------------------------------------------------------------------------- 1 | ../../large.json -------------------------------------------------------------------------------- /qa/benchmark_repo/large_model/config.pbtxt: -------------------------------------------------------------------------------- 1 | backend: "fil" 2 | max_batch_size: 6329 3 | input [ 4 | { 5 | name: "input__0" 6 | data_type: TYPE_FP32 7 | dims: [ 393 ] 8 | } 9 | ] 10 | output [ 11 | { 12 | name: "output__0" 13 | data_type: TYPE_FP32 14 | dims: [ 2 ] 15 | } 16 | ] 17 | instance_group [{ kind: KIND_GPU }] 18 | parameters [ 19 | { 20 | key: "model_type" 21 | value: { string_value: "xgboost_json" } 22 | }, 23 | { 24 | key: "predict_proba" 25 | value: { string_value: "true" } 26 | }, 27 | { 28 | key: "output_class" 29 | value: { string_value: "true" } 30 | }, 31 | { 32 | key: "threshold" 33 | value: { string_value: "0.5" } 34 | }, 35 | { 36 | key: "storage_type" 37 | value: { string_value: "AUTO" } 38 | } 39 | ] 40 | 41 | dynamic_batching { } 42 | -------------------------------------------------------------------------------- /qa/benchmark_repo/small_model-cpu/1/xgboost.json: -------------------------------------------------------------------------------- 1 | ../../small.json -------------------------------------------------------------------------------- /qa/benchmark_repo/small_model-cpu/config.pbtxt: -------------------------------------------------------------------------------- 1 | backend: "fil" 2 | max_batch_size: 6329 3 | input [ 4 | { 5 | name: "input__0" 6 | data_type: TYPE_FP32 7 | dims: [ 393 ] 8 | } 9 | ] 10 | output [ 11 | { 12 | name: "output__0" 13 | data_type: TYPE_FP32 14 | dims: [ 2 ] 15 | } 16 | ] 17 | instance_group [{ kind: KIND_CPU }] 18 | parameters [ 19 | { 20 | key: "model_type" 21 | value: { string_value: "xgboost_json" } 22 | }, 23 | { 24 | key: "predict_proba" 25 | value: { string_value: "true" } 26 | }, 27 | { 28 | key: "output_class" 29 | value: { string_value: "true" } 30 | }, 31 | { 32 | key: "threshold" 33 | value: { string_value: "0.5" } 34 | }, 35 | { 36 | key: "storage_type" 37 | value: { string_value: "AUTO" } 38 | } 39 | ] 40 | 41 | dynamic_batching { } 42 | -------------------------------------------------------------------------------- /qa/benchmark_repo/small_model/1/xgboost.json: -------------------------------------------------------------------------------- 1 | ../../small.json -------------------------------------------------------------------------------- /qa/benchmark_repo/small_model/config.pbtxt: -------------------------------------------------------------------------------- 1 | backend: "fil" 2 | max_batch_size: 6329 3 | input [ 4 | { 5 | name: "input__0" 6 | data_type: TYPE_FP32 7 | dims: [ 393 ] 8 | } 9 | ] 10 | output [ 11 | { 12 | name: "output__0" 13 | data_type: TYPE_FP32 14 | dims: [ 2 ] 15 | } 16 | ] 17 | instance_group [{ kind: KIND_GPU }] 18 | parameters [ 19 | { 20 | key: "model_type" 21 | value: { string_value: "xgboost_json" } 22 | }, 23 | { 24 | key: "predict_proba" 25 | value: { string_value: "true" } 26 | }, 27 | { 28 | key: "output_class" 29 | value: { string_value: "true" } 30 | }, 31 | { 32 | key: "threshold" 33 | value: { string_value: "0.5" } 34 | }, 35 | { 36 | key: "storage_type" 37 | value: { string_value: "AUTO" } 38 | } 39 | ] 40 | 41 | dynamic_batching { } 42 | -------------------------------------------------------------------------------- /qa/collate_benchmarks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import re 4 | import sys 5 | 6 | import cudf 7 | import numpy as np 8 | from scipy.spatial import ConvexHull 9 | 10 | try: 11 | import matplotlib.pyplot as plt 12 | except ImportError: 13 | plt = None 14 | 15 | BATCH_FILE_RE = re.compile(r"([0-9]+)\.csv") 16 | SUMMARY_DIR_NAME = "summary" 17 | 18 | 19 | def gather_perf_reports(benchmark_dir): 20 | _, model_dirs, _ = next(os.walk(benchmark_dir)) 21 | for model in model_dirs: 22 | if model != SUMMARY_DIR_NAME: 23 | model_dir = os.path.join(benchmark_dir, model) 24 | for file_ in os.listdir(model_dir): 25 | file_match = BATCH_FILE_RE.match(file_) 26 | if file_match: 27 | batch = int(file_match.groups()[0]) 28 | data = cudf.read_csv(os.path.join(model_dir, file_)) 29 | yield model, batch, data 30 | 31 | 32 | def collate_raw_data(benchmark_dir): 33 | all_data = [] 34 | for model, batch, data in gather_perf_reports(benchmark_dir): 35 | annotations = cudf.DataFrame( 36 | {"Model": [model] * data.shape[0], "Batch Size": [batch] * data.shape[0]}, 37 | columns=("Model", "Batch Size"), 38 | ) 39 | all_data.append(cudf.concat([annotations, data], axis=1)) 40 | return cudf.concat(all_data, axis=0, ignore_index=True) 41 | 42 | 43 | def pts_to_line(pt1, pt2): 44 | slope = (pt2[1] - pt1[1]) / (pt2[0] - pt1[0]) 45 | intercept = pt1[1] - slope * pt1[0] 46 | return (slope, intercept) 47 | 48 | 49 | def scatter_to_hull(pts): 50 | hull = ConvexHull(pts) 51 | pts = pts[hull.vertices] 52 | pts = pts[pts[:, 0].argsort(), :] 53 | slope, intercept = pts_to_line(pts[0, :], pts[-1, :]) 54 | filtered_pts = pts[pts[:, 1] >= slope * pts[:, 0] + intercept] 55 | return np.concatenate((pts[(0,), :], filtered_pts, pts[(-1,), :])) 56 | 57 | 58 | def plot_lat_tp(data, latency_percentile=99): 59 | all_models = data["Model"].unique().to_pandas() 60 | plt.xscale("log") 61 | plt.yscale("log") 62 | for model in all_models: 63 | model_data = raw_data.loc[data["Model"] == model].to_pandas() 64 | hull = scatter_to_hull( 65 | model_data[[f"p{latency_percentile} latency", "Inferences/Second"]].values 66 | ) 67 | plt.plot(hull[:, 0], hull[:, 1], "-", label=model) 68 | plt.title("Throughput vs. Latency (log-log)") 69 | plt.xlabel("p99 Latency (microseconds)") 70 | plt.ylabel("Throughput (samples/s)") 71 | plt.legend(all_models) 72 | 73 | 74 | def plot_througput(data, budget, output_dir): 75 | filtered_data = data[data["p99 latency"] <= budget][["Model", "Inferences/Second"]] 76 | maximums = filtered_data.groupby("Model").max() 77 | maximums.sort_index(inplace=True) 78 | 79 | budget_ms = round(budget / 1000) 80 | 81 | raw_data.to_csv(os.path.join(output_dir, f"{budget_ms}.csv")) 82 | 83 | if plt is not None: 84 | plt.bar(maximums.index.values_host, maximums["Inferences/Second"].values_host) 85 | plt.xticks(rotation=90) 86 | plt.title(f"Throughput for p99 latency budget of {budget_ms} ms") 87 | plt.subplots_adjust(bottom=0.35) 88 | plt.savefig(os.path.join(output_dir, f"{budget_ms}.png")) 89 | plt.close() 90 | 91 | 92 | if __name__ == "__main__": 93 | benchmark_dir = sys.argv[1] 94 | raw_data = collate_raw_data(benchmark_dir) 95 | summary_dir = os.path.join(benchmark_dir, SUMMARY_DIR_NAME) 96 | throughput_dir = os.path.join(summary_dir, "throughput") 97 | os.makedirs(throughput_dir, exist_ok=True) 98 | raw_data.to_csv(os.path.join(summary_dir, "raw_data.csv")) 99 | 100 | try: 101 | latency_cutoff = float(os.environ["MAX_LATENCY"]) 102 | raw_data = raw_data[raw_data["p99 latency"] <= (latency_cutoff * 1000)] 103 | except KeyError: 104 | pass # No latency cutoff specified 105 | 106 | raw_data.to_csv(os.path.join(summary_dir, "filtered_data.csv")) 107 | 108 | plot_througput(raw_data, 1000, throughput_dir) 109 | plot_througput(raw_data, 5000, throughput_dir) 110 | plot_througput(raw_data, 20000, throughput_dir) 111 | 112 | if plt is not None: 113 | plot_lat_tp(raw_data) 114 | plt.savefig(os.path.join(summary_dir, "latency_throughput.png")) 115 | -------------------------------------------------------------------------------- /qa/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | QA_DIR=$(cd $(dirname $0); pwd) 19 | TEST_SCRIPT="$QA_DIR/run_tests.sh" 20 | 21 | if [[ $TRITON_ENABLE_GPU != "OFF" ]] 22 | then 23 | echo 'Running tests for GPU models...' 24 | MODEL_REPO="${QA_DIR}/L0_e2e/model_repository" "$TEST_SCRIPT" 25 | echo 'Running tests for CPU models...' 26 | MODEL_REPO="${QA_DIR}/L0_e2e/cpu_model_repository" "$TEST_SCRIPT" 27 | fi 28 | 29 | echo 'Running tests without visible GPUs...' 30 | CPU_ONLY=1 MODEL_REPO="${QA_DIR}/L0_e2e/cpu_model_repository" "$TEST_SCRIPT" 31 | -------------------------------------------------------------------------------- /qa/generate_example_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | RETRAIN=${RETRAIN:-0} 19 | 20 | QA_DIR=$(cd $(dirname $0); pwd) 21 | MODEL_REPO="${QA_DIR}/L0_e2e/model_repository" 22 | CPU_MODEL_REPO="${QA_DIR}/L0_e2e/cpu_model_repository" 23 | 24 | SCRIPTS_DIR="${QA_DIR}/../scripts" 25 | GENERATOR_SCRIPT="python ${QA_DIR}/L0_e2e/generate_example_model.py" 26 | 27 | SKLEARN_CONVERTER="${SCRIPTS_DIR}/convert_sklearn.py" 28 | CUML_CONVERTER="${SCRIPTS_DIR}/convert_cuml.py" 29 | 30 | models=() 31 | 32 | name=xgboost 33 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 34 | then 35 | ${GENERATOR_SCRIPT} \ 36 | --name $name \ 37 | --depth 11 \ 38 | --trees 2000 \ 39 | --classes 3 \ 40 | --features 500 \ 41 | --storage_type SPARSE 42 | models+=( $name ) 43 | fi 44 | 45 | name=xgboost_json 46 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 47 | then 48 | ${GENERATOR_SCRIPT} \ 49 | --name $name \ 50 | --format xgboost_json \ 51 | --depth 7 \ 52 | --trees 500 \ 53 | --features 500 \ 54 | --predict_proba 55 | models+=( $name ) 56 | fi 57 | 58 | name=xgboost_ubj 59 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 60 | then 61 | ${GENERATOR_SCRIPT} \ 62 | --name $name \ 63 | --format xgboost_ubj \ 64 | --depth 7 \ 65 | --trees 500 \ 66 | --features 500 \ 67 | --predict_proba 68 | models+=( $name ) 69 | fi 70 | 71 | name=xgboost_shap 72 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 73 | then 74 | ${GENERATOR_SCRIPT} \ 75 | --name $name \ 76 | --depth 11 \ 77 | --trees 2000 \ 78 | --classes 3 \ 79 | --features 500 \ 80 | --storage_type SPARSE \ 81 | --max_batch_size 4096 82 | models+=( $name ) 83 | fi 84 | 85 | name=lightgbm 86 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 87 | then 88 | ${GENERATOR_SCRIPT} \ 89 | --name $name \ 90 | --format lightgbm \ 91 | --type lightgbm \ 92 | --depth 3 \ 93 | --trees 2000 \ 94 | --cat_features 3 \ 95 | --predict_proba \ 96 | --disable_experimental_optimizations 97 | models+=( $name ) 98 | fi 99 | 100 | name=lightgbm_rf 101 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 102 | then 103 | ${GENERATOR_SCRIPT} \ 104 | --name $name \ 105 | --format lightgbm \ 106 | --type lightgbm_rf \ 107 | --depth 10 \ 108 | --trees 20 \ 109 | --classes 10 \ 110 | --predict_proba 111 | models+=( $name ) 112 | fi 113 | 114 | name=regression 115 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 116 | then 117 | ${GENERATOR_SCRIPT} \ 118 | --name $name \ 119 | --format lightgbm \ 120 | --type lightgbm \ 121 | --depth 25 \ 122 | --trees 10 \ 123 | --features 400 \ 124 | --task regression 125 | models+=( $name ) 126 | fi 127 | 128 | name=sklearn 129 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 130 | then 131 | ${GENERATOR_SCRIPT} \ 132 | --name $name \ 133 | --type sklearn \ 134 | --depth 3 \ 135 | --trees 10 \ 136 | --features 500 \ 137 | --predict_proba 138 | models+=( $name ) 139 | fi 140 | $SKLEARN_CONVERTER "${MODEL_REPO}/${name}/1/model.pkl" 2>/dev/null 141 | 142 | name=cuml 143 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ] 144 | then 145 | ${GENERATOR_SCRIPT} \ 146 | --name $name \ 147 | --type cuml \ 148 | --depth 3 \ 149 | --trees 10 \ 150 | --max_batch_size 32768 \ 151 | --features 500 \ 152 | --task regression 153 | models+=( $name ) 154 | fi 155 | $CUML_CONVERTER "${MODEL_REPO}/${name}/1/model.pkl" 2>/dev/null 156 | 157 | mkdir -p "${CPU_MODEL_REPO}" 158 | cp -r "${MODEL_REPO}"/* "${CPU_MODEL_REPO}"/ 159 | 160 | if [ ! -z $OWNER_ID ] && [ ! -z $OWNER_GID ] 161 | then 162 | chown -R "${OWNER_ID}:${OWNER_GID}" "${MODEL_REPO}" 163 | chown -R "${OWNER_ID}:${OWNER_GID}" "${CPU_MODEL_REPO}" 164 | fi 165 | 166 | find "${CPU_MODEL_REPO}" -name 'config.pbtxt' -exec \ 167 | sed -i s/KIND_GPU/KIND_CPU/g {} + 168 | -------------------------------------------------------------------------------- /qa/run-clang-format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) 2019-2023, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific languapge governing permissions and 14 | # limitations under the License. 15 | # 16 | # Note: This file was taken directly from 17 | # https://github.com/rapidsai/cuml/blob/branch-21.06/cpp/scripts/run-clang-format.py 18 | # with minor modifications. 19 | 20 | from __future__ import print_function 21 | 22 | import argparse 23 | import os 24 | import re 25 | import subprocess 26 | import sys 27 | import tempfile 28 | 29 | EXPECTED_VERSION = "11.1.0" 30 | VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)") 31 | # NOTE: populate this list with more top-level dirs as we add more of them to 32 | # to the cuml repo 33 | DEFAULT_DIRS = ["src", "src/triton_fil"] 34 | 35 | 36 | def parse_args(): 37 | argparser = argparse.ArgumentParser("Runs clang-format on a project") 38 | argparser.add_argument( 39 | "-dstdir", 40 | type=str, 41 | default=None, 42 | help="Directory to store the temporary outputs of" 43 | " clang-format. If nothing is passed for this, then" 44 | " a temporary dir will be created using `mkdtemp`", 45 | ) 46 | argparser.add_argument( 47 | "-exe", type=str, default="clang-format", help="Path to clang-format exe" 48 | ) 49 | argparser.add_argument( 50 | "-inplace", 51 | default=False, 52 | action="store_true", 53 | help="Replace the source files itself.", 54 | ) 55 | argparser.add_argument( 56 | "-regex", 57 | type=str, 58 | default=r"[.](cu|cuh|h|hpp|cpp)$", 59 | help="Regex string to filter in sources", 60 | ) 61 | argparser.add_argument( 62 | "-ignore", 63 | type=str, 64 | default=r"cannylab/bh[.]cu$", 65 | help="Regex used to ignore files from matched list", 66 | ) 67 | argparser.add_argument( 68 | "-v", dest="verbose", action="store_true", help="Print verbose messages" 69 | ) 70 | argparser.add_argument( 71 | "dirs", type=str, nargs="*", help="List of dirs where to find sources" 72 | ) 73 | args = argparser.parse_args() 74 | args.regex_compiled = re.compile(args.regex) 75 | args.ignore_compiled = re.compile(args.ignore) 76 | if args.dstdir is None: 77 | args.dstdir = tempfile.mkdtemp() 78 | ret = subprocess.check_output("%s --version" % args.exe, shell=True) 79 | ret = ret.decode("utf-8") 80 | version = VERSION_REGEX.match(ret) 81 | if version is None: 82 | raise Exception("Failed to figure out clang-format version!") 83 | version = version.group(1) 84 | if version != EXPECTED_VERSION: 85 | raise Exception( 86 | f"clang-format exe must be v{EXPECTED_VERSION} found '{version}'" 87 | ) 88 | if len(args.dirs) == 0: 89 | args.dirs = DEFAULT_DIRS 90 | return args 91 | 92 | 93 | def list_all_src_files(file_regex, ignore_regex, srcdirs, dstdir, inplace): 94 | allFiles = [] 95 | for srcdir in srcdirs: 96 | for root, dirs, files in os.walk(srcdir): 97 | for f in files: 98 | if re.search(file_regex, f): 99 | src = os.path.join(root, f) 100 | if re.search(ignore_regex, src): 101 | continue 102 | if inplace: 103 | _dir = root 104 | else: 105 | _dir = os.path.join(dstdir, root) 106 | dst = os.path.join(_dir, f) 107 | allFiles.append((src, dst)) 108 | return allFiles 109 | 110 | 111 | def run_clang_format(src, dst, exe, verbose): 112 | dstdir = os.path.dirname(dst) 113 | if not os.path.exists(dstdir): 114 | os.makedirs(dstdir) 115 | # run the clang format command itself 116 | if src == dst: 117 | cmd = "%s -i %s" % (exe, src) 118 | else: 119 | cmd = "%s %s > %s" % (exe, src, dst) 120 | try: 121 | subprocess.check_call(cmd, shell=True) 122 | except subprocess.CalledProcessError: 123 | print("Failed to run clang-format! Maybe your env is not proper?") 124 | raise 125 | # run the diff to check if there are any formatting issues 126 | cmd = "diff -q %s %s >/dev/null" % (src, dst) 127 | try: 128 | subprocess.check_call(cmd, shell=True) 129 | if verbose: 130 | print("%s passed" % os.path.basename(src)) 131 | except subprocess.CalledProcessError: 132 | print( 133 | "{} failed! 'diff {} {}' will show formatting violations!".format( 134 | os.path.basename(src), src, dst 135 | ) 136 | ) 137 | return False 138 | return True 139 | 140 | 141 | def main(): 142 | args = parse_args() 143 | # Attempt to making sure that we run this script from root of repo always 144 | if not os.path.exists(".git"): 145 | print("Error!! This needs to always be run from the root of repo") 146 | sys.exit(-1) 147 | all_files = list_all_src_files( 148 | args.regex_compiled, args.ignore_compiled, args.dirs, args.dstdir, args.inplace 149 | ) 150 | # actual format checker 151 | status = True 152 | for src, dst in all_files: 153 | if not run_clang_format(src, dst, args.exe, args.verbose): 154 | status = False 155 | if not status: 156 | print("clang-format failed! You have 2 options:") 157 | print(" 1. Look at formatting differences above and fix them manually") 158 | print(" 2. Or run the below command to bulk-fix all these at once") 159 | print("Bulk-fix command: ") 160 | print( 161 | " python qa/run-clang-format.py {} -inplace".format(" ".join(sys.argv[1:])) 162 | ) 163 | sys.exit(-1) 164 | return 165 | 166 | 167 | if __name__ == "__main__": 168 | main() 169 | -------------------------------------------------------------------------------- /qa/run_benchmarks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MODELS=${MODELS:-'small_model small_model-cpu large_model large_model-cpu'} 3 | BATCHES=${BATCHES:-'1 16 128 1024'} 4 | MAX_LATENCY=${MAX_LATENCY:-5} 5 | 6 | repo_root="$(git rev-parse --show-toplevel)" || repo_root="$PWD" 7 | if [ -z $OUTPUT ] 8 | then 9 | OUTPUT="$repo_root/qa/benchmark_output" 10 | fi 11 | 12 | if [ -z $SHARED_MEM ] 13 | then 14 | SHARED_MEM="none" 15 | fi 16 | 17 | run_benchmark() { 18 | model="$1" 19 | batch="$2" 20 | output_dir="$OUTPUT/$model" 21 | if [ ! -d "$output_dir" ] 22 | then 23 | mkdir -p "$output_dir" 24 | fi 25 | 26 | output_file="$output_dir/$batch.csv" 27 | perf_analyzer \ 28 | -i GRPC \ 29 | --shared-memory $SHARED_MEM \ 30 | --percentile 99 \ 31 | --binary-search \ 32 | --concurrency-range 1:64:2 \ 33 | -l "$MAX_LATENCY" \ 34 | -m "$model" \ 35 | -b "$batch" \ 36 | -f "$output_file" 37 | } 38 | 39 | for model in $MODELS 40 | do 41 | for batch in $BATCHES 42 | do 43 | run_benchmark "$model" "$batch" 44 | done 45 | done 46 | 47 | python3 $repo_root/qa/collate_benchmarks.py $OUTPUT 48 | -------------------------------------------------------------------------------- /qa/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | QA_DIR=$(cd $(dirname $0); pwd) 19 | SERVER_ARGS="" 20 | UUID="$(cat /proc/sys/kernel/random/uuid)" 21 | CONTAINER_NAME="fil_backend-ci-$UUID" 22 | DOCKER_RUN=0 23 | DOCKER_ARGS="-d -p 8000:8000 -p 8001:8001 -p 8002:8002 --name ${CONTAINER_NAME}" 24 | TRITON_PID='' 25 | LOG_DIR="${QA_DIR}/logs" 26 | SERVER_LOG="${LOG_DIR}/${UUID}-server.log" 27 | TEST_PROFILE=${TEST_PROFILE:-ci} 28 | 29 | if [ ! -d "${LOG_DIR}" ] 30 | then 31 | mkdir -p "${LOG_DIR}" 32 | fi 33 | 34 | if [ -z $MODEL_REPO ] 35 | then 36 | MODEL_REPO="${QA_DIR}/L0_e2e/model_repository" 37 | fi 38 | MODEL_REPO="$(readlink -f $MODEL_REPO)" 39 | 40 | DOCKER_ARGS="${DOCKER_ARGS} -v ${MODEL_REPO}:/models" 41 | 42 | if [ -z $CPU_ONLY ] || [ $CPU_ONLY -eq 0 ] 43 | then 44 | if [ -z $CUDA_VISIBLE_DEVICES ] 45 | then 46 | DOCKER_ARGS="${DOCKER_ARGS} --gpus all" 47 | TRITON_VISIBLE_DEVICES='all' 48 | else 49 | DOCKER_ARGS="${DOCKER_ARGS} --gpus ${CUDA_VISIBLE_DEVICES}" 50 | TRITON_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES}" 51 | fi 52 | else 53 | TRITON_VISIBLE_DEVICES='' 54 | fi 55 | 56 | # If a Triton Docker image has been provided or no tritonserver executable is 57 | # available, run the server via Docker 58 | if [ ! -z $TRITON_IMAGE ] || ! command -v tritonserver 59 | then 60 | DOCKER_RUN=1 61 | TRITON_IMAGE=${TRITON_IMAGE:-rapids_triton_identity} 62 | SERVER_ARGS="${SERVER_ARGS} --model-repository=/models" 63 | else 64 | SERVER_ARGS="${SERVER_ARGS} --model-repository=${MODEL_REPO}" 65 | fi 66 | 67 | start_server() { 68 | if [ $DOCKER_RUN -eq 1 ] 69 | then 70 | docker run $DOCKER_ARGS $TRITON_IMAGE > /dev/null 71 | else 72 | if [ -z $TRITON_VISIBLE_DEVICES ] 73 | then 74 | CUDA_VISIBLE_DEVICES='' tritonserver $SERVER_ARGS > $SERVER_LOG 2>&1 & 75 | else 76 | tritonserver $SERVER_ARGS > $SERVER_LOG 2>&1 & 77 | fi 78 | TRITON_PID="$!" 79 | fi 80 | } 81 | 82 | [ ${START_SERVER:-1} -eq 1 ] && start_server || true 83 | 84 | # TODO (wphicks): Run linters 85 | 86 | finally() { 87 | if [ ${START_SERVER:-1} -eq 1 ] 88 | then 89 | if [ -z $TRITON_PID ] 90 | then 91 | docker logs $CONTAINER_NAME > $SERVER_LOG 2>&1 92 | docker rm -f $CONTAINER_NAME > /dev/null 2>&1 93 | else 94 | kill -15 $TRITON_PID 95 | wait 96 | fi 97 | fi 98 | } 99 | 100 | trap finally EXIT 101 | 102 | if [ ! -z $CPU_ONLY ] && [ $CPU_ONLY -eq 1 ] 103 | then 104 | pytest \ 105 | --repo "${MODEL_REPO}" \ 106 | --hypothesis-profile "$TEST_PROFILE" \ 107 | "$QA_DIR" 108 | else 109 | pytest --repo "${MODEL_REPO}" "$QA_DIR" --hypothesis-profile "$TEST_PROFILE" 110 | fi 111 | -------------------------------------------------------------------------------- /scripts/convert_cuml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """cuML RF to Treelite checkpoint converter 17 | 18 | Given a path to a pickle file containing a cuML random forest model, this 19 | script will generate a Treelite checkpoint file representation of the model in 20 | the same directory. 21 | """ 22 | 23 | import argparse 24 | import pathlib 25 | import pickle 26 | 27 | if __name__ == "__main__": 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument("pickle_file", help="Path to the pickle file to convert") 30 | args = parser.parse_args() 31 | 32 | with open(args.pickle_file, "rb") as f: 33 | model = pickle.load(f) 34 | 35 | model_dir = pathlib.Path(args.pickle_file).resolve().parent 36 | out_path = model_dir / "checkpoint.tl" 37 | 38 | model.convert_to_treelite_model().to_treelite_checkpoint(str(out_path)) 39 | -------------------------------------------------------------------------------- /scripts/convert_sklearn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) 2021, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | """sklearn RF/GBDT to Treelite checkpoint converter 18 | 19 | Given a path to a pickle file containing a scikit-learn random forest (or 20 | gradient boosting) model, this script will generate a Treelite checkpoint file 21 | representation of the model in the same directory. 22 | """ 23 | 24 | import argparse 25 | import pathlib 26 | import pickle 27 | 28 | import treelite 29 | 30 | if __name__ == "__main__": 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument("pickle_file", help="Path to the pickle file to convert") 33 | args = parser.parse_args() 34 | 35 | with open(args.pickle_file, "rb") as f: 36 | model = pickle.load(f) 37 | 38 | model_dir = pathlib.Path(args.pickle_file).resolve().parent 39 | out_path = model_dir / "checkpoint.tl" 40 | 41 | tl_model = treelite.sklearn.import_model(model) 42 | tl_model.serialize(out_path) 43 | -------------------------------------------------------------------------------- /scripts/environment.yml: -------------------------------------------------------------------------------- 1 | name: triton_scripts 2 | channels: 3 | - conda-forge 4 | - nvidia 5 | - rapidsai 6 | dependencies: 7 | - cuda-version=12.8 8 | - cuml=25.04 9 | - python 10 | - scikit-learn>=1.5 11 | - treelite>=4.4 12 | -------------------------------------------------------------------------------- /src/api.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | namespace triton { namespace backend { namespace NAMESPACE { 35 | 36 | using ModelState = rapids::TritonModelState; 37 | using ModelInstanceState = 38 | rapids::ModelInstanceState; 39 | 40 | extern "C" { 41 | 42 | /** Confirm that backend is compatible with Triton's backend API version 43 | */ 44 | TRITONSERVER_Error* 45 | TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) 46 | { 47 | return rapids::triton_api::initialize(backend); 48 | } 49 | 50 | TRITONSERVER_Error* 51 | TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model) 52 | { 53 | return rapids::triton_api::model_initialize(model); 54 | } 55 | 56 | TRITONSERVER_Error* 57 | TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model) 58 | { 59 | return rapids::triton_api::model_finalize(model); 60 | } 61 | 62 | TRITONSERVER_Error* 63 | TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance) 64 | { 65 | return rapids::triton_api::instance_initialize< 66 | ModelState, ModelInstanceState>(instance); 67 | } 68 | 69 | TRITONSERVER_Error* 70 | TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance) 71 | { 72 | return rapids::triton_api::instance_finalize(instance); 73 | } 74 | 75 | TRITONSERVER_Error* 76 | TRITONBACKEND_ModelInstanceExecute( 77 | TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** raw_requests, 78 | uint32_t const request_count) 79 | { 80 | return rapids::triton_api::execute( 81 | instance, raw_requests, static_cast(request_count)); 82 | } 83 | 84 | } // extern "C" 85 | 86 | }}} // namespace triton::backend::NAMESPACE 87 | -------------------------------------------------------------------------------- /src/cpu_forest_model.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace triton { namespace backend { namespace NAMESPACE { 29 | 30 | template <> 31 | struct ForestModel { 32 | ForestModel() = default; 33 | ForestModel(std::shared_ptr tl_model) : tl_model_{tl_model} {} 34 | 35 | void predict( 36 | rapids::Buffer& output, rapids::Buffer const& input, 37 | std::size_t samples, bool predict_proba) const 38 | { 39 | tl_model_->predict(output, input, samples, predict_proba); 40 | } 41 | 42 | 43 | private: 44 | std::shared_ptr tl_model_; 45 | }; 46 | 47 | }}} // namespace triton::backend::NAMESPACE 48 | -------------------------------------------------------------------------------- /src/fil_config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | namespace triton { namespace backend { namespace NAMESPACE { 26 | 27 | namespace detail { 28 | 29 | inline auto 30 | name_to_tl_algo(std::string const& name) 31 | { 32 | auto result = ML::fil::algo_t{}; 33 | if (name == "ALGO_AUTO") { 34 | result = ML::fil::algo_t::ALGO_AUTO; 35 | } else if (name == "NAIVE") { 36 | result = ML::fil::algo_t::NAIVE; 37 | } else if (name == "TREE_REORG") { 38 | result = ML::fil::algo_t::TREE_REORG; 39 | } else if (name == "BATCH_TREE_REORG") { 40 | result = ML::fil::algo_t::BATCH_TREE_REORG; 41 | } else { 42 | auto log_stream = std::stringstream{}; 43 | log_stream << "Unknown FIL algorithm name: " << name; 44 | throw rapids::TritonException(rapids::Error::InvalidArg, log_stream.str()); 45 | } 46 | 47 | return result; 48 | } 49 | 50 | inline auto 51 | name_to_storage_type(std::string const& name) 52 | { 53 | auto result = ML::fil::storage_type_t{}; 54 | if (name == "AUTO") { 55 | result = ML::fil::storage_type_t::AUTO; 56 | } else if (name == "DENSE") { 57 | result = ML::fil::storage_type_t::DENSE; 58 | } else if (name == "SPARSE") { 59 | result = ML::fil::storage_type_t::SPARSE; 60 | } else if (name == "SPARSE8") { 61 | result = ML::fil::storage_type_t::SPARSE8; 62 | } else { 63 | auto log_stream = std::stringstream{}; 64 | log_stream << "Unknown FIL storage type name: " << name; 65 | throw rapids::TritonException(rapids::Error::InvalidArg, log_stream.str()); 66 | } 67 | 68 | return result; 69 | } 70 | 71 | } // namespace detail 72 | 73 | inline auto 74 | tl_to_fil_config(treelite_config const& tl_config) 75 | { 76 | return ML::fil::treelite_params_t{ 77 | detail::name_to_tl_algo(tl_config.algo), 78 | tl_config.output_class, 79 | tl_config.threshold, 80 | detail::name_to_storage_type(tl_config.storage_type), 81 | tl_config.blocks_per_sm, 82 | tl_config.threads_per_tree, 83 | 0, 84 | nullptr, 85 | ML::fil::precision_t::PRECISION_FLOAT32}; 86 | } 87 | 88 | }}} // namespace triton::backend::NAMESPACE 89 | -------------------------------------------------------------------------------- /src/forest_model.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #ifdef TRITON_ENABLE_GPU 20 | #include 21 | #endif 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace triton { namespace backend { namespace NAMESPACE { 33 | 34 | /* This struct defines a unified prediction interface to both FIL and GTIL. 35 | * Template specializations are provided based on the type of memory the model 36 | * is expected to process */ 37 | template 38 | struct ForestModel { 39 | using device_id_t = int; 40 | 41 | ForestModel(std::shared_ptr tl_model) 42 | { 43 | throw rapids::TritonException( 44 | rapids::Error::Unsupported, 45 | "ForestModel invoked with a memory type unsupported by this build"); 46 | } 47 | 48 | ForestModel( 49 | device_id_t device_id, cudaStream_t stream, 50 | std::shared_ptr tl_model) 51 | { 52 | throw rapids::TritonException( 53 | rapids::Error::Unsupported, 54 | "ForestModel invoked with a memory type unsupported by this build"); 55 | } 56 | 57 | void predict( 58 | rapids::Buffer& output, rapids::Buffer const& input, 59 | std::size_t samples, bool predict_proba) const 60 | { 61 | throw rapids::TritonException( 62 | rapids::Error::Unsupported, 63 | "ForestModel invoked with a memory type unsupported by this build"); 64 | } 65 | }; 66 | }}} // namespace triton::backend::NAMESPACE 67 | -------------------------------------------------------------------------------- /src/gpu_forest_model.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace triton { namespace backend { namespace NAMESPACE { 33 | 34 | using fil_forest_t = ML::fil::forest_t; 35 | 36 | template <> 37 | struct ForestModel { 38 | using device_id_t = int; 39 | ForestModel( 40 | device_id_t device_id, cudaStream_t stream, 41 | std::shared_ptr tl_model) 42 | : device_id_{device_id}, raft_handle_{stream}, tl_model_{tl_model}, 43 | fil_forest_{[this]() { 44 | auto result = fil_forest_t{}; 45 | auto variant_result = ML::fil::forest_variant{}; 46 | auto config = tl_to_fil_config(tl_model_->config()); 47 | ML::fil::from_treelite( 48 | raft_handle_, &variant_result, tl_model_->handle(), &config); 49 | try { 50 | result = std::get(variant_result); 51 | } 52 | catch (std::bad_variant_access const& err) { 53 | throw rapids::TritonException( 54 | rapids::Error::Internal, 55 | "Model did not load with expected precision"); 56 | } 57 | return result; 58 | }()} 59 | { 60 | } 61 | 62 | ForestModel(ForestModel const& other) = default; 63 | ForestModel& operator=(ForestModel const& other) = default; 64 | ForestModel(ForestModel&& other) = default; 65 | ForestModel& operator=(ForestModel&& other) = default; 66 | 67 | ~ForestModel() noexcept { ML::fil::free(raft_handle_, fil_forest_); } 68 | 69 | void predict( 70 | rapids::Buffer& output, rapids::Buffer const& input, 71 | std::size_t samples, bool predict_proba) const 72 | { 73 | ML::fil::predict( 74 | raft_handle_, fil_forest_, output.data(), input.data(), samples, 75 | predict_proba); 76 | } 77 | 78 | private: 79 | raft::handle_t raft_handle_; 80 | std::shared_ptr tl_model_; 81 | fil_forest_t fil_forest_; 82 | device_id_t device_id_; 83 | }; 84 | 85 | }}} // namespace triton::backend::NAMESPACE 86 | -------------------------------------------------------------------------------- /src/gpu_treeshap_model.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | namespace triton { namespace backend { namespace NAMESPACE { 31 | 32 | template <> 33 | struct TreeShapModel { 34 | using device_id_t = int; 35 | TreeShapModel( 36 | device_id_t device_id, cudaStream_t stream, 37 | std::shared_ptr tl_model) 38 | : device_id_{device_id}, raft_handle_{stream}, tl_model_{tl_model}, 39 | path_info_{ML::Explainer::extract_path_info(tl_model_->handle())} 40 | { 41 | } 42 | 43 | TreeShapModel(TreeShapModel const& other) = default; 44 | TreeShapModel& operator=(TreeShapModel const& other) = default; 45 | TreeShapModel(TreeShapModel&& other) = default; 46 | TreeShapModel& operator=(TreeShapModel&& other) = default; 47 | 48 | void predict( 49 | rapids::Buffer& output, rapids::Buffer const& input, 50 | std::size_t n_rows, std::size_t n_cols) const 51 | { 52 | // Need to synchronize on the stream because treeshap currently does not 53 | // take a stream on its API 54 | input.stream_synchronize(); 55 | ML::Explainer::gpu_treeshap( 56 | path_info_, 57 | ML::Explainer::FloatPointer(const_cast(input.data())), n_rows, 58 | n_cols, ML::Explainer::FloatPointer(output.data()), output.size()); 59 | output.stream_synchronize(); 60 | } 61 | 62 | private: 63 | raft::handle_t raft_handle_; 64 | std::shared_ptr tl_model_; 65 | device_id_t device_id_; 66 | ML::Explainer::TreePathHandle path_info_; 67 | }; 68 | 69 | }}} // namespace triton::backend::NAMESPACE 70 | -------------------------------------------------------------------------------- /src/herring/node.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | #include 19 | #include 20 | 21 | namespace herring { 22 | /* Summary of Types 23 | * ---------------- 24 | * value_t (float or double): The value used for testing a node condition or 25 | * for providing the output of leaves. 26 | * feature_index_t (std::uint16_t or std::uint32_t): Index indicating which 27 | * feature this conditional applies to 28 | * offset_t (std::uint16_t or std::uint32_t): Offset between this node and 29 | * its distant child. For small trees, using a smaller type can reduce the 30 | * padded size of the node to as few as 8 bytes. 31 | * output_index_t (typically std::uint32_t): If leaf output values cannot be 32 | * stored in the same memory as test condition values, this index provides a 33 | * lookup location for output values stored in the tree. 34 | */ 35 | template < 36 | typename value_t, typename feature_index_t, typename offset_t, 37 | typename output_index_t> 38 | struct simple_node { 39 | using value_type = value_t; // float or double 40 | using index_type = feature_index_t; 41 | using offset_type = offset_t; 42 | using output_index_type = output_index_t; 43 | using category_set_type = 44 | std::bitset; 45 | // Cannot use std::variant here because it takes up 4 additional bytes when 46 | // value_type is float 47 | union value_or_index { 48 | value_type value; 49 | output_index_type index; 50 | category_set_type categories; 51 | }; 52 | value_or_index value; // 4 bytes for float 53 | offset_type 54 | distant_offset; // 2 bytes for depth < 16 or small trees; 4 otherwise 55 | index_type feature; // 1-4 bytes, depending on number of features 56 | 57 | simple_node() : value{value_type{}}, distant_offset{}, feature{} {} 58 | }; 59 | 60 | template < 61 | bool categorical, bool inclusive_threshold, typename value_t, 62 | typename feature_index_t, typename offset_t, typename output_index_t> 63 | auto 64 | evaluate_node( 65 | simple_node const& node, 66 | float feature_value) 67 | { 68 | auto condition = false; 69 | if constexpr (categorical) { 70 | if (feature_value >= 0 && feature_value < node.value.categories.size()) { 71 | // NOTE: This cast aligns with the convention used in LightGBM and 72 | // other frameworks to cast floats when converting to integral 73 | // categories. This can have surprising effects with floating point 74 | // arithmetic, but it is kept this way for now in order to provide 75 | // consistency with results obtained from the training frameworks. 76 | condition = 77 | node.value.categories[static_cast(feature_value)]; 78 | } 79 | } else { 80 | if constexpr (inclusive_threshold) { 81 | condition = (feature_value <= node.value.value); 82 | } else { 83 | condition = (feature_value < node.value.value); 84 | } 85 | } 86 | 87 | // This narrowing conversion is guaranteed safe because distant_offset 88 | // cannot be 0 89 | // TODO(wphicks): Guarantee this with custom types 90 | // (https://github.com/triton-inference-server/fil_backend/issues/204) 91 | #pragma GCC diagnostic push 92 | #pragma GCC diagnostic ignored "-Wnarrowing" 93 | return offset_t{1 + condition * (node.distant_offset - 1)}; 94 | #pragma GCC diagnostic pop 95 | } 96 | 97 | template < 98 | bool categorical, bool inclusive_threshold, typename value_t, 99 | typename feature_index_t, typename offset_t, typename output_index_t> 100 | auto 101 | evaluate_node( 102 | simple_node const& node, 103 | float const* row) 104 | { 105 | auto feature_value = *(row + node.feature); 106 | return evaluate_node(node, feature_value); 107 | } 108 | } // namespace herring 109 | -------------------------------------------------------------------------------- /src/herring/omp_helpers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | #include 19 | 20 | template 21 | struct thread_count { 22 | thread_count() : value{omp_get_max_threads()} {} 23 | thread_count(T t) 24 | : value{[](T t) { 25 | auto result = T{t}; 26 | auto max_count = omp_get_max_threads(); 27 | if (t < 1 || t > max_count) { 28 | result = max_count; 29 | } 30 | return result; 31 | }(t)} 32 | { 33 | } 34 | operator int() const { return static_cast(value); } 35 | 36 | private: 37 | T value; 38 | }; 39 | -------------------------------------------------------------------------------- /src/herring/output_ops.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace herring { 20 | 21 | /* Enum representing possible element-wise operations on output */ 22 | enum class element_op { 23 | disable, 24 | signed_square, 25 | hinge, 26 | sigmoid, 27 | exponential, 28 | exponential_standard_ratio, 29 | logarithm_one_plus_exp 30 | }; 31 | 32 | /* Enum representing possible row-wise operations on output */ 33 | enum class row_op { disable, softmax, max_index }; 34 | } // namespace herring 35 | -------------------------------------------------------------------------------- /src/herring/tree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "herring/type_helpers.hpp" 25 | 26 | namespace herring { 27 | /* A tree that can just return the stored value of nodes as its output */ 28 | template < 29 | typename value_t, typename feature_index_t, typename offset_t, 30 | typename output_index_t, typename output_t> 31 | struct simple_tree { 32 | using node_type = 33 | simple_node; 34 | using output_type = output_t; 35 | std::vector nodes; 36 | std::vector default_distant; 37 | std::vector categorical_node; 38 | bool has_categorical_nodes; 39 | 40 | auto get_leaf_value(node_type const& node) const 41 | { 42 | if constexpr (std::is_same_v) { 43 | return node.value.value; 44 | } else { 45 | static_assert(std::is_same_v); 46 | return node.value.index; 47 | } 48 | } 49 | 50 | auto get_leaf_value(std::size_t node_index) const 51 | { 52 | return get_leaf_value(nodes[node_index]); 53 | } 54 | 55 | template < 56 | bool missing_values_in_row, bool categorical_model, 57 | bool inclusive_threshold> 58 | auto evaluate_tree_node(std::size_t node_index, float const* row) const 59 | { 60 | auto result = offset_t{}; 61 | if constexpr (categorical_model) { 62 | if (!has_categorical_nodes) { 63 | result = evaluate_tree_node_< 64 | missing_values_in_row, false, inclusive_threshold>(node_index, row); 65 | } else { 66 | result = evaluate_tree_node_< 67 | missing_values_in_row, true, inclusive_threshold>(node_index, row); 68 | } 69 | } else { 70 | result = evaluate_tree_node_< 71 | missing_values_in_row, false, inclusive_threshold>(node_index, row); 72 | } 73 | return result; 74 | }; 75 | 76 | private: 77 | template < 78 | bool missing_values_in_row, bool categorical_tree, 79 | bool inclusive_threshold> 80 | auto evaluate_tree_node_(std::size_t node_index, float const* row) const 81 | { 82 | auto const& node = nodes[node_index]; 83 | auto result = offset_t{}; 84 | if constexpr (missing_values_in_row) { 85 | auto feature_value = *(row + node.feature); 86 | auto present = !std::isnan(feature_value); 87 | if (present) { 88 | if constexpr (categorical_tree) { 89 | if (!categorical_node[node_index]) { 90 | result = 91 | evaluate_node(node, feature_value); 92 | } else { 93 | result = 94 | evaluate_node(node, feature_value); 95 | } 96 | } else { 97 | result = 98 | evaluate_node(node, feature_value); 99 | } 100 | } else { 101 | // This narrowing conversion is guaranteed safe because distant_offset 102 | // cannot be 0 103 | // TODO(wphicks): Guarantee this with custom types 104 | // (https://github.com/triton-inference-server/fil_backend/issues/204) 105 | #pragma GCC diagnostic push 106 | #pragma GCC diagnostic ignored "-Wnarrowing" 107 | result = 1 + (node.distant_offset - 1) * default_distant[node_index]; 108 | #pragma GCC diagnostic pop 109 | } 110 | } else { 111 | if constexpr (categorical_tree) { 112 | if (!categorical_node[node_index]) { 113 | result = evaluate_node(node, row); 114 | } else { 115 | result = evaluate_node(node, row); 116 | } 117 | } else { 118 | result = evaluate_node(node, row); 119 | } 120 | } 121 | return result; 122 | } 123 | }; 124 | 125 | 126 | /* A tree that must look up its output values in separate storage */ 127 | template < 128 | typename value_t, typename feature_index_t, typename offset_t, 129 | typename output_index_t, typename output_t> 130 | struct lookup_tree { 131 | using node_type = 132 | simple_node; 133 | using output_type = output_t; 134 | std::vector nodes; 135 | std::vector leaf_outputs; 136 | std::vector default_distant; 137 | std::vector categorical_node; 138 | bool has_categorical_nodes; 139 | 140 | template < 141 | typename tree_output_type = output_t, 142 | std::enable_if_t< 143 | is_container_specialization::value, 144 | bool> = true> 145 | auto const& get_leaf_value(node_type const& node) const 146 | { 147 | return leaf_outputs[node.value.index]; 148 | } 149 | 150 | template < 151 | typename tree_output_type = output_t, 152 | std::enable_if_t< 153 | !is_container_specialization::value, 154 | bool> = true> 155 | auto get_leaf_value(node_type const& node) const 156 | { 157 | return leaf_outputs[node.value.index]; 158 | } 159 | 160 | auto get_leaf_value(std::size_t node_id) const 161 | { 162 | return leaf_outputs[nodes[node_id].value.index]; 163 | } 164 | 165 | template < 166 | bool missing_values_in_row, bool categorical_model, 167 | bool inclusive_threshold> 168 | auto evaluate_tree_node(std::size_t node_index, float const* row) const 169 | { 170 | auto result = offset_t{}; 171 | if constexpr (categorical_model) { 172 | if (!has_categorical_nodes) { 173 | result = evaluate_tree_node_< 174 | missing_values_in_row, false, inclusive_threshold>(node_index, row); 175 | } else { 176 | result = evaluate_tree_node_< 177 | missing_values_in_row, true, inclusive_threshold>(node_index, row); 178 | } 179 | } else { 180 | result = evaluate_tree_node_< 181 | missing_values_in_row, false, inclusive_threshold>(node_index, row); 182 | } 183 | return result; 184 | }; 185 | 186 | private: 187 | template < 188 | bool missing_values_in_row, bool categorical_tree, 189 | bool inclusive_threshold> 190 | auto evaluate_tree_node_(std::size_t node_index, float const* row) const 191 | { 192 | auto const& node = nodes[node_index]; 193 | auto result = offset_t{}; 194 | if constexpr (missing_values_in_row) { 195 | auto feature_value = *(row + node.feature); 196 | auto present = !std::isnan(feature_value); 197 | if (present) { 198 | if constexpr (categorical_tree) { 199 | if (!categorical_node[node_index]) { 200 | result = 201 | evaluate_node(node, feature_value); 202 | } else { 203 | result = 204 | evaluate_node(node, feature_value); 205 | } 206 | } else { 207 | result = 208 | evaluate_node(node, feature_value); 209 | } 210 | } else { 211 | // This narrowing conversion is guaranteed safe because distant_offset 212 | // cannot be 0 213 | // TODO(wphicks): Guarantee this with custom types 214 | // (https://github.com/triton-inference-server/fil_backend/issues/204) 215 | #pragma GCC diagnostic push 216 | #pragma GCC diagnostic ignored "-Wnarrowing" 217 | result = 1 + (node.distant_offset - 1) * default_distant[node_index]; 218 | #pragma GCC diagnostic pop 219 | } 220 | } else { 221 | if constexpr (categorical_tree) { 222 | if (!categorical_node[node_index]) { 223 | result = evaluate_node(node, row); 224 | } else { 225 | result = evaluate_node(node, row); 226 | } 227 | } else { 228 | result = evaluate_node(node, row); 229 | } 230 | } 231 | return result; 232 | } 233 | }; 234 | } // namespace herring 235 | -------------------------------------------------------------------------------- /src/herring/type_helpers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace herring { 22 | template class U> 23 | struct is_container_specialization : std::false_type { 24 | using value_type = T; 25 | }; 26 | 27 | template