├── .clang-format
├── .dockerignore
├── .gitattributes
├── .github
    └── workflows
    │   └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Example_Models.md
├── LICENSE
├── README.md
├── SKLearn_and_cuML.md
├── build.sh
├── build_conda_env_container.sh
├── ci
    ├── gitlab
    │   └── build.sh
    └── local
    │   └── build.sh
├── cmake
    ├── modules
    │   └── ConfigureCUDA.cmake
    └── thirdparty
    │   ├── get_cuml.cmake
    │   ├── get_gtest.cmake
    │   ├── get_rapids-triton.cmake
    │   └── get_treelite.cmake
├── conda
    └── environments
    │   ├── buildpy.yml
    │   ├── rapids_triton_dev.yml
    │   ├── triton_benchmark.yml
    │   ├── triton_test.yml
    │   └── triton_test_no_client.yml
├── docs
    ├── build.md
    ├── explainability.md
    ├── install.md
    ├── model_config.md
    ├── model_support.md
    ├── repo_overview.md
    ├── sklearn_and_cuml.md
    ├── tests.md
    └── workflow.md
├── notebooks
    ├── README.md
    ├── categorical-fraud-detection
    │   ├── Fraud_Detection_Example.ipynb
    │   ├── README.md
    │   └── environment.yml
    ├── faq
    │   ├── FAQs.ipynb
    │   ├── README.md
    │   └── environment.yml
    └── simple-xgboost
    │   ├── README.md
    │   └── simple_xgboost_example.ipynb
├── ops
    ├── Dockerfile
    ├── E2E.md
    ├── gpuci_conda_retry
    ├── gpuci_mamba_retry
    └── move_deps.py
├── pyproject.toml
├── qa
    ├── BENCHMARKS.md
    ├── L0_e2e
    │   ├── conftest.py
    │   ├── generate_example_model.py
    │   └── test_model.py
    ├── benchmark_repo
    │   ├── large_model-cpu
    │   │   ├── 1
    │   │   │   └── xgboost.json
    │   │   └── config.pbtxt
    │   ├── large_model
    │   │   ├── 1
    │   │   │   └── xgboost.json
    │   │   └── config.pbtxt
    │   ├── small_model-cpu
    │   │   ├── 1
    │   │   │   └── xgboost.json
    │   │   └── config.pbtxt
    │   └── small_model
    │   │   ├── 1
    │   │       └── xgboost.json
    │   │   └── config.pbtxt
    ├── collate_benchmarks.py
    ├── entrypoint.sh
    ├── generate_example_models.sh
    ├── run-clang-format.py
    ├── run_benchmarks.sh
    └── run_tests.sh
├── scripts
    ├── convert_cuml.py
    ├── convert_sklearn.py
    └── environment.yml
└── src
    ├── api.cc
    ├── cpu_forest_model.h
    ├── cpu_treeshap_model.h
    ├── fil_config.h
    ├── forest_model.h
    ├── gpu_forest_model.h
    ├── gpu_treeshap_model.h
    ├── herring
        ├── model.hpp
        ├── node.hpp
        ├── omp_helpers.hpp
        ├── output_ops.hpp
        ├── tl_helpers.hpp
        ├── tree.hpp
        └── type_helpers.hpp
    ├── linear_treeshap_constants.h
    ├── model.h
    ├── names.h
    ├── serialization.h
    ├── shared_state.h
    ├── tl_config.h
    ├── tl_model.h
    ├── tl_utils.h
    └── treeshap_model.h


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | BasedOnStyle: Google
 3 | 
 4 | IndentWidth: 2
 5 | ContinuationIndentWidth: 4
 6 | UseTab: Never
 7 | MaxEmptyLinesToKeep: 2
 8 | 
 9 | SortIncludes: true
10 | CompactNamespaces: true
11 | ReflowComments: true
12 | 
13 | DerivePointerAlignment: false
14 | PointerAlignment: Left
15 | 
16 | AllowShortIfStatementsOnASingleLine: false
17 | AllowShortBlocksOnASingleLine: false
18 | AllowShortFunctionsOnASingleLine: Inline
19 | 
20 | AlwaysBreakAfterReturnType: TopLevelDefinitions
21 | AlignAfterOpenBracket: AlwaysBreak
22 | BreakBeforeBraces: Custom
23 | BraceWrapping:
24 |   AfterClass: false
25 |   AfterControlStatement: false
26 |   AfterEnum: false
27 |   AfterFunction: true
28 |   AfterNamespace: false
29 |   AfterStruct: false
30 |   AfterUnion: false
31 |   BeforeCatch: true
32 | 
33 | BinPackArguments: true
34 | BinPackParameters: true
35 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
36 | 
37 | IndentCaseLabels: true


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | build/
4 | ops/*Dockerfile*
5 | qa/L0_e2e/model_repository
6 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/triton-inference-server/fil_backend/c2fa763d472712737815646ba508d9fa3663ba4a/.gitattributes


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | name: pre-commit
28 | 
29 | on:
30 |   pull_request:
31 |   push:
32 |     branches: [main]
33 | 
34 | jobs:
35 |   pre-commit:
36 |     runs-on: ubuntu-22.04
37 |     steps:
38 |     - uses: actions/checkout@v3
39 |     - uses: actions/setup-python@v3
40 |     - uses: pre-commit/action@v3.0.0
41 | 
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | install/
 3 | *.so
 4 | ops/stage
 5 | qa/L0_e2e/model_repository
 6 | qa/L0_e2e/cpu_model_repository
 7 | qa/logs
 8 | qa/benchmark_output
 9 | *.ipynb_checkpoints*
10 | notebooks/categorical-fraud-detection/ieee-fraud-detection.zip
11 | notebooks/categorical-fraud-detection/sample_submission.csv
12 | notebooks/categorical-fraud-detection/test_identity.csv
13 | notebooks/categorical-fraud-detection/test_transaction.csv
14 | notebooks/categorical-fraud-detection/train_identity.csv
15 | notebooks/categorical-fraud-detection/train_transaction.csv
16 | notebooks/categorical-fraud-detection/model_repository
17 | notebooks/faq/data/
18 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | repos:
28 | - repo: https://github.com/timothycrosley/isort
29 |   rev: 5.12.0
30 |   hooks:
31 |       - id: isort
32 |         additional_dependencies: [toml]
33 | - repo: https://github.com/psf/black
34 |   rev: 23.1.0
35 |   hooks:
36 |       - id: black
37 |         types_or: [python, cython]
38 | - repo: https://github.com/PyCQA/flake8
39 |   rev: 5.0.4
40 |   hooks:
41 |       - id: flake8
42 |         args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
43 |         types_or: [python, cython]
44 | - repo: https://github.com/pre-commit/mirrors-clang-format
45 |   rev: v16.0.5
46 |   hooks:
47 |       - id: clang-format
48 |         types_or: [c, c++, cuda, proto, textproto, java]
49 |         args: ["-fallback-style=none", "-style=file", "-i"]
50 | - repo: https://github.com/codespell-project/codespell
51 |   rev: v2.2.4
52 |   hooks:
53 |       - id: codespell
54 |         additional_dependencies: [tomli]
55 |         args: ["--toml", "pyproject.toml"]
56 |         exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
57 | # More details about these pre-commit hooks here:
58 | # https://pre-commit.com/hooks.html
59 | - repo: https://github.com/pre-commit/pre-commit-hooks
60 |   rev: v4.4.0
61 |   hooks:
62 |       - id: check-case-conflict
63 |       - id: check-executables-have-shebangs
64 |       - id: check-merge-conflict
65 |       - id: check-json
66 |       - id: check-toml
67 |       - id: check-yaml
68 |       - id: check-shebang-scripts-are-executable
69 |       - id: end-of-file-fixer
70 |         types_or: [c, c++, cuda, proto, textproto, java, python]
71 |       - id: mixed-line-ending
72 |       - id: requirements-txt-fixer
73 |       - id: trailing-whitespace
74 | 
75 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to the Triton FIL backend
 2 | 
 3 | ## How to Contribute
 4 | You can help improve the Triton FIL backend in any of the following ways:
 5 | - Submitting a bug report, feature request or documentation issue
 6 | - Proposing and implementing a new feature
 7 | - Implementing a feature or bug-fix for an outstanding issue
 8 | 
 9 | ### Bug reports
10 | When submitting a bug report, please include a *minimum* *reproducible*
11 | example. Ideally, this should be a snippet of code that other developers can
12 | copy, paste, and immediately run to try to reproduce the error. Please:
13 | - Do include import statements and any other code necessary to immediately run
14 |   your example
15 | - Avoid examples that require other developers to download models or data
16 |   unless you cannot reproduce the problem with synthetically-generated data
17 | 
18 | ### Code Contributions
19 | To contribute code to this project, please follow these steps:
20 | 1. Find an issue to work on or submit an issue documenting the problem you
21 |    would like to work on.
22 | 2. Comment on the issue saying that you plan to work on it.
23 | 3. Review the implementation details section below for information to help you
24 |    make your changes in a way that is consistent with the rest of the codebase.
25 | 4. Code!
26 | 5. Create your pull request.
27 | 6. Wait for other developers to review your code and update your PR as needed.
28 | 7. Once a PR is approved, it will be merged into the main branch.
29 | 
30 | #### Signing Your Work
31 | * We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license.
32 |   * Any contribution which contains commits that are not Signed-Off will not be accepted.
33 | * To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes:
34 |   ```bash
35 |   $ git commit -s -m "Add cool feature."
36 |   ```
37 |   This will append the following to your commit message:
38 |   ```
39 |   Signed-off-by: Your Name <your@email.com>
40 |   ```
41 | * Full text of the DCO:
42 |   ```
43 |     Developer Certificate of Origin
44 |     Version 1.1
45 | 
46 |     Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
47 |     1 Letterman Drive
48 |     Suite D4700
49 |     San Francisco, CA, 94129
50 | 
51 |     Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
52 |   ```
53 |   ```
54 |     Developer's Certificate of Origin 1.1
55 | 
56 |     By making a contribution to this project, I certify that:
57 | 
58 |     (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or
59 | 
60 |     (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or
61 | 
62 |     (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
63 | 
64 |     (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
65 |   ```
66 | 
67 | ## Developer Workflow Documentation
68 | Additional information useful to contributors is available in the
69 | following sections:
70 | 
71 | - [Development workflow](docs/workflow.md)
72 | - [Overview of the repo](docs/repo_overview.md)
73 | - [Build instructions](docs/build.md)
74 | - [Running tests](docs/tests.md)
75 | 
76 | ## Style
77 | Contributions to the FIL backend should:
78 | - Adhere to [Almost-Always-Auto](https://herbsutter.com/2013/08/12/gotw-94-solution-aaa-style-almost-always-auto/) style
79 | - Prefer STL algorithms to [raw loops](https://belaycpp.com/2021/06/22/dont-use-raw-loops/) wherever possible
80 | - Use C++ types except where explicitly interfacing with C code (e.g.
81 |   `std::size_t` as opposed to `size_t`)
82 | - Avoid depending on transitive includes
83 | 


--------------------------------------------------------------------------------
/Example_Models.md:
--------------------------------------------------------------------------------
  1 | # Generating Example Models
  2 | 
  3 | The FIL backend's testing infrastructure includes [a
  4 | script](https://github.com/triton-inference-server/fil_backend/blob/main/qa/L0_e2e/generate_example_model.py)
  5 | for generating example models, putting them in the correct directory layout,
  6 | and generating an associated config file. This can be helpful both for
  7 | providing a template for your own models and for testing your Triton
  8 | deployment.
  9 | 
 10 | ## Prerequisites
 11 | To use the model generation script, you will need to install
 12 | [cuML](https://rapids.ai/start.html#rapids-release-selector) and whatever
 13 | forest model framework you wish to use
 14 | ([LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html),
 15 | [XGBoost](https://xgboost.readthedocs.io/en/latest/install.html), or
 16 | [Scikit-Learn](https://scikit-learn.org/stable/install.html)). For convenience,
 17 | a Conda environment [config
 18 | file](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/triton_test.yml)
 19 | is included in the FIL backend repo which can be used to install all of these
 20 | frameworks:
 21 | 
 22 | ```bash
 23 | git clone https://github.com/triton-inference-server/fil_backend.git
 24 | cd fil_backend
 25 | conda env create -f conda/environments/triton_test.yml
 26 | conda activate triton_test
 27 | ```
 28 | 
 29 | ## Usage
 30 | 
 31 | The simplest possible invocation of the example generation script is just:
 32 | 
 33 | ```bash
 34 | python  qa/L0_e2e/generate_example_model.py
 35 | ```
 36 | 
 37 | This will create an example XGBoost model, serialize it to XGBoost's binary
 38 | format and store it (with full configuration) within the
 39 | `qa/L0_e2e/model_repository` directory.
 40 | 
 41 | ### Arguments
 42 | You can provide additional arguments to the model generation script to control
 43 | all details of the generated model. Available arguments are described in the
 44 | following sections.
 45 | 
 46 | #### Model framework
 47 | - `--type`: Takes one of `lightgbm`, `xgboost`, `sklearn` or `cuml` as argument
 48 |   and determines what framework will be used to train the model. Defaults to
 49 |   `xgboost`.
 50 | - `--format`: Determines what format to serialize the model to for frameworks
 51 |   which support multiple serialization formats. One of `xgboost`,
 52 |   `xgboost_json`, `lightgbm`, or `pickle`. If omitted, this will default to a
 53 |   valid choice for the chosen framework.
 54 | 
 55 | #### Model metadata
 56 | - `--name`: An arbitrary string used to identify the generated model. If
 57 |   omitted, a string will be generated from the model type, serialization
 58 |   format, and task.
 59 | - `--repo`: Path to the directory where you wish to set up your model
 60 |   repository. This argument is required if this script is invoked outside of
 61 |   the FIL backend Git repository. If omitted, it will default to
 62 |   `qa/L0_e2e/model_repository` from the Git repository root.
 63 | 
 64 | #### Model details
 65 | - `--task`: One of `classification` or `regression` indicating the type of
 66 |   inference task for this model.
 67 | - `--depth`: The maximum depth for trees in this model.
 68 | - `--trees`: The maximum number of trees in this model.
 69 | - `--classes`: The number of classes for classification models.
 70 | - `--features`: The number of features used for each sample.
 71 | - `--samples`: The number of randomly-generated samples to use when training
 72 |   the example model.
 73 | - `--threshold`: The threshold for classification decisions in classifier
 74 |   models.
 75 | - `--predict_proba`: A flag indicating that class scores should be outputted
 76 |   instead of class IDs for classifiers.
 77 | 
 78 | #### Triton server controls
 79 | - `--batching_window`: Maximum time in microseconds for Triton to spend
 80 |   gathering samples for a single batch
 81 | 
 82 | ### SKLearn and cuML models
 83 | Note that this example script generates only the model pickle file for
 84 | Scikit-Learn and cuML models. These must be converted to Treelite checkpoints
 85 | as described in the [documentation for using these
 86 | frameworks](https://github.com/triton-inference-server/fil_backend.git). An
 87 | example invocation for Scikit-Learn is shown below:
 88 | 
 89 | ```bash
 90 | python  qa/L0_e2e/generate_example_model.py --type sklearn --name skl_example
 91 | ./scripts/convert_sklearn qa/L0_e2e/model_repository/skl_example/1/model.pkl
 92 | ```
 93 | ## Testing example models
 94 | Once you have generated an example model (or set up a real model), you can test
 95 | it using the `qa/L0_e2e/test_model.py` script. After [starting the
 96 | server](https://github.com/triton-inference-server/fil_backend#starting-the-server),
 97 | the simplest invocation of this script is just:
 98 | ```bash
 99 | python qa/L0_e2e/test_model.py --name $NAME_OF_MODEL
100 | ```
101 | This will run a number of randomly-generated samples through your model both in
102 | Triton and locally. The results will be compared to ensure they are the same.
103 | At the end of the run, some throughput and latency numbers will be printed to
104 | the terminal, but please note that these numbers are **not indicative of
105 | real-world throughput and latency performance**. This script is designed to
106 | rigorously test unlikely corner cases in ways which will hurt reported
107 | performance. The output statistics are provided merely to help catch
108 | performance regressions between different versions or deployments of Triton and
109 | are meaningful only when compared to other test runs with the same parameters.
110 | To get an accurate picture of model throughput and latency, use Triton's [Model
111 | Analyzer](https://github.com/triton-inference-server/model_analyzer) which
112 | includes an easy-to-use tool for meaningfully testing model performance.
113 | 
114 | ### Additional arguments
115 | 
116 | - `--name`: The name of the model to test.
117 | - `--repo`: The path to the model repository. If this script is not invoked
118 |   from within the FIL backend Git repository, this option must be specified. It
119 |   defaults to `qa/L0_e2e/model_repository`.
120 | - `--host`: The URL for the Triton server. Defaults to `localhost`.
121 | - `--http_port`: If using a non-default HTTP port for Triton, the correct port
122 |   can be specified here.
123 | - `--grpc_port`: If using a non-default GRPC port for Triton, the correct port
124 |   can be specified here.
125 | - `--protocol`: While the test script will do brief tests of both HTTP and
126 |   GRPC, the specified protocol will be used for more intensive testing.
127 | - `--samples`: The total number of samples to test for each batch size
128 |   provided. Defaults to 8192.
129 | - `--batch_size`: This argument can take an arbitrary number of values. For
130 |   each provided value, all samples will be broken down into batches of the
131 |   given size and the model will be evaluated against all such batches.
132 | - `--shared_mem`: This argument can take up to two values. These values can be
133 |   either `None` or `cuda` to indicate whether the tests should use no shared
134 |   memory or CUDA shared memory. If both are given, tests will alternate between
135 |   the two. Defaults to both.
136 | - `--concurrency`: The number of concurrent threads to use for generating
137 |   requests. Higher values will provide a more rigorous test of the server's
138 |   operation when processing many simultaneous requests.
139 | - `--timeout`: The longest to wait for all samples to be processed for a
140 |   particular batch size. The appropriate value depends on your hardware,
141 |   networking configuration, and total number of samples.
142 | - `--retries`: The number of times to retry requests in order to handle network
143 |   failures.
144 |   can be specified here.
145 | 


--------------------------------------------------------------------------------
/SKLearn_and_cuML.md:
--------------------------------------------------------------------------------
1 | # Scikit-Learn and cuML random forest support
2 | This page [has moved](https://github.com/triton-inference-server/fil_backend/blob/main/docs/sklearn_and_cuml.md).
3 | 


--------------------------------------------------------------------------------
/build_conda_env_container.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | set -e
17 | 
18 | REPODIR=$(cd $(dirname $0); pwd)
19 | 
20 | NUMARGS=$#
21 | ARGS=$*
22 | VALIDTARGETS="conda-dev conda-test"
23 | VALIDFLAGS="-h --help"
24 | VALIDARGS="${VALIDTARGETS} ${VALIDFLAGS}"
25 | HELP="$0 <target> [<flag> ...]
26 |  where <target> is:
27 |    conda-dev        - build container with dev Conda env
28 |    conda-test       - build container with test Conda env
29 |  and <flag> is:
30 |    -h               - print this text
31 | 
32 |  The following environment variables are also accepted to allow further customization:
33 |    CONDA_DEV_TAG    - The tag to use for the image containing dev Conda env
34 |    CONDA_TEST_TAG   - The tag to use for the image containing test Conda env
35 | "
36 | 
37 | export DOCKER_BUILDKIT=1
38 | 
39 | function hasArg {
40 |     (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
41 | }
42 | 
43 | if hasArg -h || hasArg --help || (( ${NUMARGS} == 0 ))
44 | then
45 |     echo "${HELP}"
46 |     exit 0
47 | fi
48 | 
49 | if [ -z $CONDA_DEV_TAG ]
50 | then
51 |   CONDA_DEV_TAG='triton_fil_dev_conda'
52 | fi
53 | if [ -z $CONDA_TEST_TAG ]
54 | then
55 |   CONDA_TEST_TAG='triton_fil_test_conda'
56 | fi
57 | 
58 | BUILD_CONDA_DEV=0
59 | BUILD_CONDA_TEST=0
60 | if hasArg conda-dev
61 | then
62 |   BUILD_CONDA_DEV=1
63 | elif hasArg conda-test
64 | then
65 |   BUILD_CONDA_TEST=1
66 | fi
67 | 
68 | if [ $BUILD_CONDA_DEV -eq 1 ]
69 | then
70 |   docker build \
71 |     $DOCKER_ARGS \
72 |     --target conda-dev \
73 |     -t "$CONDA_DEV_TAG" \
74 |     -f ops/Dockerfile \
75 |     $REPODIR
76 | fi
77 | 
78 | if [ $BUILD_CONDA_TEST -eq 1 ]
79 | then
80 |   docker build \
81 |     $DOCKER_ARGS \
82 |     --target base-test-install \
83 |     -t "$CONDA_TEST_TAG" \
84 |     -f ops/Dockerfile \
85 |     $REPODIR
86 | fi
87 | 


--------------------------------------------------------------------------------
/ci/gitlab/build.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | 
  5 | # ENVIRONMENT VARIABLE OPTIONS
  6 | # PREBUILT_SERVER_TAG: The tag of the prebuilt Triton server image to test
  7 | # PREBUILT_TEST_TAG: The tag of the prebuilt test image to run tests in
  8 | # MODEL_BUILDER_IMAGE: A Docker image to be used for training test models
  9 | # LOG_DIR: Host directory for storing logs
 10 | # NV_DOCKER_ARGS: A bash expression that (when evaluated) returns Docker
 11 | #   arguments for controlling GPU access
 12 | # BUILDPY: 1 to use Triton's build.py script for server build
 13 | # CPU_ONLY: 1 to build without GPU support
 14 | # NO_CACHE: 0 to enable Docker cache during build
 15 | # USE_CLIENT_WHEEL: 1 to install Triton client from wheel for tests
 16 | # SDK_IMAGE: If set, copy client wheel from this SDK image
 17 | 
 18 | REPO_DIR=$(cd $(dirname $0)/../../; pwd)
 19 | BUILDPY=${BUILDPY:-0}
 20 | CPU_ONLY=${CPU_ONLY:-0}
 21 | NO_CACHE=${NO_CACHE:-1}
 22 | 
 23 | if [ -z $CI_COMMIT_BRANCH ]
 24 | then
 25 |   export BUILDPY_BRANCH="$CI_COMMIT_BRANCH"
 26 | fi
 27 | 
 28 | # Check if test or base images need to be built and do so if necessary
 29 | if [ -z $PREBUILT_SERVER_TAG ]
 30 | then
 31 |   export SERVER_TAG=triton_fil
 32 | else
 33 |   export PREBUILT_IMAGE="$PREBUILT_SERVER_TAG"
 34 |   export SERVER_TAG="$PREBUILT_SERVER_TAG"
 35 | fi
 36 | [ -z $TRITON_SERVER_REPO_TAG ] || export TRITON_REF="$TRITON_SERVER_REPO_TAG"
 37 | [ -z $TRITON_COMMON_REPO_TAG ] || export COMMON_REF="$TRITON_COMMON_REPO_TAG"
 38 | [ -z $TRITON_CORE_REPO_TAG ] || export CORE_REF="$TRITON_CORE_REPO_TAG"
 39 | [ -z $TRITON_BACKEND_REPO_TAG ] || export BACKEND_REF="$TRITON_BACKEND_REPO_TAG"
 40 | 
 41 | if [ -z $PREBUILT_TEST_TAG ]
 42 | then
 43 |   export TEST_TAG=triton_fil_test
 44 |   echo "Building Docker images..."
 45 |   if [ $BUILDPY -eq 1 ]
 46 |   then
 47 |     BUILDARGS='--buildpy'
 48 |   else
 49 |     BUILDARGS=''
 50 |   fi
 51 |   if [ $CPU_ONLY -eq 1 ]
 52 |   then
 53 |     BUILDARGS="$BUILDARGS --cpu-only"
 54 |   fi
 55 |   if [ $NO_CACHE -eq 1 ]
 56 |   then
 57 |     BUILDARGS="$BUILDARGS --no-cache"
 58 |   fi
 59 |   if [ ! -z $SDK_IMAGE ]
 60 |   then
 61 |     USE_CLIENT_WHEEL=1
 62 |     export SDK_IMAGE="${SDK_IMAGE}"
 63 |   fi
 64 |   if [ ! -z $USE_CLIENT_WHEEL ]
 65 |   then
 66 |     export USE_CLIENT_WHEEL="${USE_CLIENT_WHEEL}"
 67 |   fi
 68 |   $REPO_DIR/build.sh $BUILDARGS
 69 | else
 70 |   export TEST_TAG="$PREBUILT_TEST_TAG"
 71 | fi
 72 | 
 73 | MODEL_BUILDER_IMAGE=${MODEL_BUILDER_IMAGE:-${TEST_TAG}}
 74 | 
 75 | # Set up directory for logging
 76 | if [ -z $LOG_DIR ]
 77 | then
 78 |   LOG_DIR="qa/logs"
 79 | else
 80 |   LOG_DIR="$(readlink -f $LOG_DIR)"
 81 | fi
 82 | if [ ! -d "${LOG_DIR}" ]
 83 | then
 84 |   mkdir -p "${LOG_DIR}"
 85 | fi
 86 | 
 87 | if [ -z "$NV_DOCKER_ARGS" ]
 88 | then
 89 |   if [ -z $CUDA_VISIBLE_DEVICES ]
 90 |   then
 91 |     GPU_DOCKER_ARGS='--gpus all'
 92 |   else
 93 |     GPU_DOCKER_ARGS='--gpus $CUDA_VISIBLE_DEVICES'
 94 |   fi
 95 | else
 96 |   GPU_DOCKER_ARGS="$(eval ${NV_DOCKER_ARGS} || echo -n '')"
 97 | fi
 98 | 
 99 | if [ ! -z $RUNNER_ID ]
100 | then
101 |   DOCKER_ARGS="$DOCKER_ARGS --label RUNNER_ID=${RUNNER_ID}"
102 | fi
103 | 
104 | echo "Generating example models..."
105 | # Use 'docker cp' instead of mounting, because we cannot mount directories
106 | # from the GitLab runner due to the "Docker-outside-of-Docker" architecture.
107 | # See https://confluence.nvidia.com/pages/viewpage.action?spaceKey=DL&title=GitLab+Runner
108 | # for more details.
109 | MODEL_BUILDER_INST=model_builder_inst_${CI_JOB_ID}
110 | docker create -t --name ${MODEL_BUILDER_INST} \
111 |   -e RETRAIN=1 \
112 |   -e OWNER_ID=$(id -u) \
113 |   -e OWNER_GID=$(id -g) \
114 |   $GPU_DOCKER_ARGS \
115 |   $DOCKER_ARGS \
116 |   $MODEL_BUILDER_IMAGE \
117 |   bash
118 | docker start ${MODEL_BUILDER_INST}
119 | docker exec ${MODEL_BUILDER_INST} bash -c 'mkdir -p /qa/L0_e2e/ && mkdir -p /qa/logs/'
120 | mkdir -p qa/L0_e2e/model_repository/
121 | mkdir -p qa/L0_e2e/cpu_model_repository/
122 | docker cp qa/L0_e2e/model_repository/ ${MODEL_BUILDER_INST}:/qa/L0_e2e/
123 | docker cp qa/L0_e2e/cpu_model_repository/ ${MODEL_BUILDER_INST}:/qa/L0_e2e/
124 | 
125 | docker exec \
126 |   ${MODEL_BUILDER_INST} \
127 |   bash -c 'source /conda/test/bin/activate && /qa/generate_example_models.sh'
128 | 
129 | docker cp ${MODEL_BUILDER_INST}:/qa/L0_e2e/model_repository/ qa/L0_e2e/
130 | docker cp ${MODEL_BUILDER_INST}:/qa/L0_e2e/cpu_model_repository/ qa/L0_e2e/
131 | docker cp ${MODEL_BUILDER_INST}:/qa/logs/. "${LOG_DIR}"
132 | docker stop ${MODEL_BUILDER_INST}
133 | docker rm ${MODEL_BUILDER_INST}
134 | 
135 | if [ $CPU_ONLY -eq 1 ]
136 | then
137 |   DOCKER_ARGS="${DOCKER_ARGS} -e TRITON_ENABLE_GPU=OFF"
138 | else
139 |   DOCKER_ARGS="${DOCKER_ARGS} ${GPU_DOCKER_ARGS}"
140 | fi
141 | 
142 | echo "Running tests..."
143 | TEST_INST=test_inst_${CI_JOB_ID}
144 | docker create -t --name ${TEST_INST} \
145 |   -e TEST_PROFILE=ci \
146 |   $DOCKER_ARGS \
147 |   $TEST_TAG \
148 |   bash
149 | docker start ${TEST_INST}
150 | docker exec ${TEST_INST} bash -c 'mkdir -p /qa/L0_e2e/ && mkdir -p /qa/logs/'
151 | docker cp qa/L0_e2e/model_repository/ ${TEST_INST}:/qa/L0_e2e/
152 | docker cp qa/L0_e2e/cpu_model_repository/ ${TEST_INST}:/qa/L0_e2e/
153 | docker exec ${TEST_INST} bash -c 'source /conda/test/bin/activate && /qa/entrypoint.sh'
154 | 
155 | docker cp ${TEST_INST}:/qa/logs/. "${LOG_DIR}"
156 | docker stop ${TEST_INST}
157 | docker rm ${TEST_INST}
158 | 


--------------------------------------------------------------------------------
/ci/local/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # ENVIRONMENT VARIABLE OPTIONS
 6 | # RETRAIN: 1 to force retraining of existing models, 0 to use existing models
 7 | #   if available
 8 | # USE_CLIENT_WHEEL: 1 to install Triton client from wheel for tests
 9 | # SDK_IMAGE: If set, copy client wheel from this SDK image
10 | # HOST_BUILD: 1 to build backend lib on host and use it in tests
11 | 
12 | REPO_DIR=$(cd $(dirname $0)/../../; pwd)
13 | QA_DIR="${REPO_DIR}/qa"
14 | MODEL_DIR="${QA_DIR}/L0_e2e/model_repository"
15 | CPU_MODEL_DIR="${QA_DIR}/L0_e2e/cpu_model_repository"
16 | HOST_BUILD="${HOST_BUILD:-0}"
17 | TEST_PROFILE="${TEST_PROFILE:-dev}"
18 | 
19 | export SERVER_TAG=triton_fil
20 | export TEST_TAG=triton_fil_test
21 | 
22 | if [ ! -z $SDK_IMAGE ]
23 | then
24 |   export SDK_IMAGE="${SDK_IMAGE}"
25 |   USE_CLIENT_WHEEL=1
26 | fi
27 | if [ ! -z $USE_CLIENT_WHEEL ]
28 | then
29 |   export USE_CLIENT_WHEEL="${USE_CLIENT_WHEEL}"
30 | fi
31 | 
32 | BUILD_ARGS=''
33 | if [ $HOST_BUILD -eq 1 ]
34 | then
35 |   BUILD_ARGS="$BUILD_ARGS --host"
36 | fi
37 | 
38 | echo "Building Docker images..."
39 | $REPO_DIR/build.sh $BUILD_ARGS
40 | 
41 | DOCKER_ARGS="-t -v ${QA_DIR}/logs:/qa/logs"
42 | 
43 | if [ -z $CUDA_VISIBLE_DEVICES ]
44 | then
45 |   DOCKER_ARGS="$DOCKER_ARGS --gpus all"
46 | else
47 |   DOCKER_ARGS="$DOCKER_ARGS --gpus $CUDA_VISIBLE_DEVICES"
48 | fi
49 | 
50 | echo "Generating example models..."
51 | docker run \
52 |   -e RETRAIN=${RETRAIN:-0} \
53 |   -e OWNER_ID=$(id -u) \
54 |   -e OWNER_GID=$(id -g) \
55 |   -e TEST_PROFILE=$TEST_PROFILE \
56 |   $DOCKER_ARGS \
57 |   -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
58 |   -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
59 |   --rm $TEST_TAG \
60 |   bash -c 'source /conda/test/bin/activate && /qa/generate_example_models.sh'
61 | 
62 | echo "Running GPU-enabled tests..."
63 | docker run \
64 |   $DOCKER_ARGS \
65 |   -e TEST_PROFILE=$TEST_PROFILE \
66 |   -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
67 |   -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
68 |   --rm $TEST_TAG
69 | 
70 | export SERVER_TAG=triton_fil:cpu
71 | export TEST_TAG=triton_fil_test:cpu
72 | 
73 | echo "Building CPU-only Docker images..."
74 | $REPO_DIR/build.sh $BUILD_ARGS --cpu-only
75 | 
76 | echo "Running CPU-only tests..."
77 | docker run \
78 |   $DOCKER_ARGS \
79 |   -e TRITON_ENABLE_GPU=OFF \
80 |   -e TEST_PROFILE=$TEST_PROFILE \
81 |   -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
82 |   -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
83 |   --rm $TEST_TAG
84 | 


--------------------------------------------------------------------------------
/cmake/modules/ConfigureCUDA.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | if(DISABLE_DEPRECATION_WARNINGS)
18 |     list(APPEND RAPIDS_TRITON_CXX_FLAGS -Wno-deprecated-declarations)
19 |     list(APPEND RAPIDS_TRITON_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations)
20 | endif()
21 | 
22 | if(CMAKE_COMPILER_IS_GNUCXX)
23 |     list(APPEND RAPIDS_TRITON_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
24 | endif()
25 | 
26 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
27 | 
28 | # set warnings as errors
29 | if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
30 |     list(APPEND RAPIDS_TRITON_CUDA_FLAGS -Werror=all-warnings)
31 | endif()
32 | list(APPEND RAPIDS_TRITON_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
33 | 
34 | # Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
35 | if(CUDA_ENABLE_LINEINFO)
36 |     list(APPEND RAPIDS_TRITON_CUDA_FLAGS -lineinfo)
37 | endif()
38 | 
39 | # Debug options
40 | if(CMAKE_BUILD_TYPE MATCHES Debug)
41 |     message(VERBOSE "RAPIDS_TRITON: Building with debugging flags")
42 |     list(APPEND RAPIDS_TRITON_CUDA_FLAGS -G -Xcompiler=-rdynamic)
43 | endif()
44 | 


--------------------------------------------------------------------------------
/cmake/thirdparty/get_cuml.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright (c) 2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | function(find_and_configure_cuml)
18 | 
19 |     set(oneValueArgs VERSION FORK PINNED_TAG USE_TREELITE_STATIC)
20 |     cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
21 |                           "${multiValueArgs}" ${ARGN} )
22 | 
23 |     set(CUML_ALGORITHMS "FIL" CACHE STRING "List of algorithms to build in cuml")
24 |     list(APPEND CUML_ALGORITHMS "TREESHAP")
25 | 
26 |     rapids_cpm_find(cuml ${PKG_VERSION}
27 |       GLOBAL_TARGETS      cuml++
28 |       BUILD_EXPORT_SET    rapids_triton-exports
29 |       INSTALL_EXPORT_SET  rapids_triton-exports
30 |         CPM_ARGS
31 |             GIT_REPOSITORY https://github.com/${PKG_FORK}/cuml.git
32 |             GIT_TAG        ${PKG_PINNED_TAG}
33 |             SOURCE_SUBDIR  cpp
34 |             OPTIONS
35 |               "BUILD_CUML_C_LIBRARY OFF"
36 |               "BUILD_CUML_CPP_LIBRARY ON"
37 |               "BUILD_CUML_TESTS OFF"
38 |               "BUILD_PRIMS_TESTS OFF"
39 |               "BUILD_CUML_MG_TESTS OFF"
40 |               "BUILD_CUML_EXAMPLES OFF"
41 |               "BUILD_CUML_BENCH OFF"
42 |               "BUILD_CUML_PRIMS_BENCH OFF"
43 |               "BUILD_CUML_STD_COMMS OFF"
44 |               "BUILD_SHARED_LIBS ON"
45 |               "CUML_USE_TREELITE_STATIC ${PKG_USE_TREELITE_STATIC}"
46 |               "USE_CCACHE ON"
47 |               "RAFT_COMPILE_LIBRARIES OFF"
48 |               "RAFT_ENABLE_NN_DEPENDENCIES OFF"
49 |     )
50 | 
51 |     message(VERBOSE "RAPIDS_TRITON: Using CUML located in ${cuml_SOURCE_DIR}")
52 | 
53 | endfunction()
54 | 
55 | # Change pinned tag here to test a commit in CI
56 | # To use a different RAFT locally, set the CMake variable
57 | # CPM_raft_SOURCE=/path/to/local/raft
58 | find_and_configure_cuml(VERSION    25.04
59 |                         FORK       rapidsai
60 |                         PINNED_TAG branch-25.04
61 |                         USE_TREELITE_STATIC ${TRITON_FIL_USE_TREELITE_STATIC}
62 |                         )
63 | 


--------------------------------------------------------------------------------
/cmake/thirdparty/get_gtest.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright (c) 2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | function(find_and_configure_gtest VERSION)
18 | 
19 |     if(TARGET GTest::gtest)
20 |         return()
21 |     endif()
22 | 
23 |     rapids_cpm_find(GTest ${VERSION}
24 |         GLOBAL_TARGETS  gtest gtest_main GTest::gtest GTest::gtest_main gmock gmock_main
25 |         CPM_ARGS
26 |             GIT_REPOSITORY  https://github.com/google/googletest.git
27 |             GIT_TAG         release-${VERSION}
28 |             GIT_SHALLOW     TRUE
29 |             OPTIONS         "INSTALL_GTEST OFF"
30 |             # googletest >= 1.10.0 provides a cmake config file -- use it if it exists
31 |             FIND_PACKAGE_ARGUMENTS "CONFIG"
32 |     )
33 | 
34 |     if(NOT TARGET GTest::gtest)
35 |         add_library(GTest::gtest ALIAS gtest)
36 |         add_library(GTest::gtest_main ALIAS gtest_main)
37 |     endif()
38 | 
39 | endfunction()
40 | 
41 | set(RAFT_MIN_VERSION_gtest 1.10.0)
42 | 
43 | find_and_configure_gtest(${RAFT_MIN_VERSION_gtest})
44 | 


--------------------------------------------------------------------------------
/cmake/thirdparty/get_rapids-triton.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | function(find_and_configure_rapids_triton)
18 | 
19 |     set(oneValueArgs VERSION FORK PINNED_TAG)
20 |     cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
21 |                           "${multiValueArgs}" ${ARGN} )
22 | 
23 |     rapids_cpm_find(rapids_triton ${PKG_VERSION}
24 |       GLOBAL_TARGETS      rapids_triton::rapids_triton
25 |       BUILD_EXPORT_SET    rapids_triton-exports
26 |       INSTALL_EXPORT_SET  rapids_triton-exports
27 |         CPM_ARGS
28 |             GIT_REPOSITORY ${PKG_FORK}
29 |             GIT_TAG        ${PKG_PINNED_TAG}
30 |             SOURCE_SUBDIR  cpp
31 |             OPTIONS
32 |               "BUILD_TESTS OFF"
33 |               "BUILD_EXAMPLE OFF"
34 |     )
35 | endfunction()
36 | 
37 | find_and_configure_rapids_triton(VERSION    ${RAPIDS_DEPENDENCIES_VERSION}
38 |                                  FORK       ${RAPIDS_TRITON_REPO_PATH}
39 |                                  PINNED_TAG ${RAPIDS_TRITON_REPO_TAG}
40 |                                  )
41 | 


--------------------------------------------------------------------------------
/cmake/thirdparty/get_treelite.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | function(find_and_configure_treelite)
18 | 
19 |     set(oneValueArgs VERSION PINNED_TAG BUILD_STATIC_LIBS)
20 |     cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
21 |                           "${multiValueArgs}" ${ARGN} )
22 | 
23 |     message(VERBOSE "CUML: In treelite func, static: ${PKG_BUILD_STATIC_LIBS}")
24 |     if(NOT PKG_BUILD_STATIC_LIBS)
25 |         list(APPEND TREELITE_LIBS treelite::treelite)
26 |     else()
27 |         list(APPEND TREELITE_LIBS treelite::treelite_static)
28 |     endif()
29 | 
30 |     rapids_cpm_find(Treelite ${PKG_VERSION}
31 |         GLOBAL_TARGETS       ${TREELITE_LIBS}
32 |         INSTALL_EXPORT_SET   cuml-exports
33 |         CPM_ARGS
34 |             GIT_REPOSITORY   https://github.com/dmlc/treelite.git
35 |             GIT_TAG          ${PKG_PINNED_TAG}
36 |             OPTIONS
37 |               "USE_OPENMP ON"
38 |               "Treelite_BUILD_STATIC_LIBS ${PKG_BUILD_STATIC_LIBS}"
39 |     )
40 | 
41 | 
42 |     list(APPEND TREELITE_LIBS_NO_PREFIX treelite)
43 |     if(Treelite_ADDED AND PKG_BUILD_STATIC_LIBS)
44 |         list(APPEND TREELITE_LIBS_NO_PREFIX treelite_static)
45 |     endif()
46 | 
47 |     set(Treelite_ADDED ${Treelite_ADDED} PARENT_SCOPE)
48 |     set(TREELITE_LIBS ${TREELITE_LIBS} PARENT_SCOPE)
49 | 
50 |     if(Treelite_ADDED)
51 |         if (NOT PKG_BUILD_STATIC_LIBS)
52 |             target_include_directories(treelite
53 |                 PUBLIC $<BUILD_INTERFACE:${Treelite_SOURCE_DIR}/include>
54 |                        $<BUILD_INTERFACE:${Treelite_BINARY_DIR}/include>)
55 |             if(NOT TARGET treelite::treelite)
56 |                 add_library(treelite::treelite ALIAS treelite)
57 |             endif()
58 |         else()
59 |             target_include_directories(treelite_static
60 |                 PUBLIC $<BUILD_INTERFACE:${Treelite_SOURCE_DIR}/include>
61 |                        $<BUILD_INTERFACE:${Treelite_BINARY_DIR}/include>)
62 |             if(NOT TARGET treelite::treelite_static)
63 |                 add_library(treelite::treelite_static ALIAS treelite_static)
64 |             endif()
65 |         endif()
66 | 
67 |         rapids_export(BUILD Treelite
68 |             EXPORT_SET TreeliteTargets
69 |             GLOBAL_TARGETS ${TREELITE_LIBS_NO_PREFIX}
70 |             NAMESPACE treelite::)
71 |     endif()
72 | 
73 |     # We generate the treelite-config files when we built treelite locally, so always do `find_dependency`
74 |     rapids_export_package(BUILD Treelite cuml-exports)
75 | 
76 |     # Tell cmake where it can find the generated treelite-config.cmake we wrote.
77 |     include("${rapids-cmake-dir}/export/find_package_root.cmake")
78 |     rapids_export_find_package_root(BUILD Treelite [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuml-exports)
79 | endfunction()
80 | 
81 | find_and_configure_treelite(VERSION     4.4.1
82 |                         PINNED_TAG  386bd0de99f5a66584c7e58221ee38ce606ad1ae
83 |                         BUILD_STATIC_LIBS ${TRITON_FIL_USE_TREELITE_STATIC})
84 | 


--------------------------------------------------------------------------------
/conda/environments/buildpy.yml:
--------------------------------------------------------------------------------
1 | ---
2 | name: triton_buildpy
3 | channels:
4 |   - conda-forge
5 | dependencies:
6 |   - docker-py
7 |   - python
8 |   - distro
9 | 


--------------------------------------------------------------------------------
/conda/environments/rapids_triton_dev.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: rapids_triton_dev
 3 | channels:
 4 |   - conda-forge
 5 | dependencies:
 6 |   - ccache
 7 |   - cmake>=4.0
 8 |   - ninja
 9 |   - python
10 |   # TODO(hcho3): Remove the pin when
11 |   # https://github.com/triton-inference-server/common/pull/114 is merged
12 |   - rapidjson>=1.1.0,<1.1.0.post*
13 | 


--------------------------------------------------------------------------------
/conda/environments/triton_benchmark.yml:
--------------------------------------------------------------------------------
 1 | name: triton_benchmark
 2 | channels:
 3 |   - conda-forge
 4 |   - nvidia
 5 |   - rapidsai
 6 | dependencies:
 7 |   - cuda-version=12.8
 8 |   - cudf=25.04
 9 |   - libcusolver
10 |   - libcusparse
11 |   - matplotlib
12 |   - pip
13 |   - python
14 |   - scipy
15 |   - pip:
16 |       - tritonclient[all]
17 |       - protobuf
18 |       - git+https://github.com/rapidsai/rapids-triton.git@branch-25.06#subdirectory=python
19 | 


--------------------------------------------------------------------------------
/conda/environments/triton_test.yml:
--------------------------------------------------------------------------------
 1 | name: triton_test
 2 | channels:
 3 |   - conda-forge
 4 |   - nvidia
 5 |   - rapidsai
 6 | dependencies:
 7 |   - aws-sdk-cpp
 8 |   - clang-tools=19.1.7
 9 |   - cuda-version=12.8
10 |   - cudf=25.04
11 |   - cuml=25.04
12 |   - flake8
13 |   - hypothesis
14 |   - lightgbm
15 |   - matplotlib
16 |   - pip
17 |   - pytest
18 |   - python
19 |   - rapidsai::xgboost>=2.1
20 |   - scikit-learn>=1.5
21 |   - treelite>=4.4
22 |   - pip:
23 |       - tritonclient[all]
24 |       - protobuf
25 |       - git+https://github.com/rapidsai/rapids-triton.git@branch-25.06#subdirectory=python
26 | 


--------------------------------------------------------------------------------
/conda/environments/triton_test_no_client.yml:
--------------------------------------------------------------------------------
 1 | name: triton_test
 2 | channels:
 3 |   - conda-forge
 4 |   - nvidia
 5 |   - rapidsai
 6 | dependencies:
 7 |   - aws-sdk-cpp
 8 |   - clang-tools=19.1.7
 9 |   - cuda-version=12.8
10 |   - cudf=25.04
11 |   - cuml=25.04
12 |   - flake8
13 |   - hypothesis
14 |   - lightgbm
15 |   - pip
16 |   - pytest
17 |   - python
18 |   - python-rapidjson
19 |   - rapidsai::xgboost>=2.1
20 |   - scikit-learn>=1.5
21 |   - treelite>=4.4
22 | 


--------------------------------------------------------------------------------
/docs/build.md:
--------------------------------------------------------------------------------
  1 | <!--
  2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | #  * Redistributions of source code must retain the above copyright
  8 | #    notice, this list of conditions and the following disclaimer.
  9 | #  * Redistributions in binary form must reproduce the above copyright
 10 | #    notice, this list of conditions and the following disclaimer in the
 11 | #    documentation and/or other materials provided with the distribution.
 12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 13 | #    contributors may be used to endorse or promote products derived
 14 | #    from this software without specific prior written permission.
 15 | #
 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | -->
 28 | 
 29 | # Building the FIL Backend
 30 | Triton backends are implemented as shared libraries which are conditionally
 31 | loaded by the main Triton server process. To build the FIL backend shared
 32 | library or simply to create a Docker image with a fresh build of the backend,
 33 | you may follow the indicated steps.
 34 | 
 35 | **Note**: Most users will not need to build their own copy of the FIL backend.
 36 | These instructions are intended for developers and those who wish to make
 37 | custom tweaks to the backend. If you are just looking for install instructions,
 38 | follow our [installation guide](docs/install.md).
 39 | 
 40 | ## Prerequisites
 41 | The FIL backend may be built either using Docker or on the host. We
 42 | recommend using the Dockerized build in order to simplify dependency management
 43 | unless you have a specific need to build on the host.
 44 | 
 45 | ### Dockerized Build
 46 | - [Docker](https://docs.docker.com/get-docker/)
 47 | - [The NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker)
 48 | 
 49 | ### Host Build
 50 | - [CUDA toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) (Only required for GPU-enabled builds)
 51 | - [CMake](https://cmake.org/install/)
 52 | - [Ninja](https://ninja-build.org/) (Optional but recommended)
 53 | Except for the CUDA toolkit, these dependencies can be installed via conda using the provided
 54 | [environment
 55 | file](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/rapids_triton_dev.yml):
 56 | 
 57 | ```bash
 58 | conda env create -f conda/environments/rapids_triton_dev.yml
 59 | conda activate rapids_triton_dev
 60 | ```
 61 | 
 62 | 
 63 | ## Using the Build Script
 64 | To simplify the build process, the FIL backend provides a `build.sh` script at
 65 | the root of the repo. For most use cases, it is sufficient to simply
 66 | invoke the script:
 67 | 
 68 | ```bash
 69 | ./build.sh
 70 | ```
 71 | 
 72 | This is a lightweight wrapper around a `docker build` command which helps
 73 | provide the correct build arguments and variables. By default, it will build
 74 | *both* a "server" image which is equivalent to the usual Triton Docker image
 75 | and a "test" image whose entrypoint will invoke the FIL backend's tests.
 76 | 
 77 | ### Build Options
 78 | The build script uses a number of flags and environment variables to
 79 | control the details of what gets built and how. These options are
 80 | summarized below:
 81 | 
 82 | #### Flags
 83 | - `-g`: Perform a debug build
 84 | - `-h`: Print help test for build script
 85 | - `--cpu-only`: Build CPU-only version of library
 86 | - `--tag-commit`: Tag Docker images using the current git commit
 87 | - `--no-cache`: Disable Docker cache for this build
 88 | - `--host`: Build on host, **not** in Docker
 89 | - `--buildpy`: Invoke Triton's `build.py` script to perform build.
 90 |   **Note:** This is **not** recommended for end-users. It is included
 91 |   primarily for testing compatibility with upstream build changes. If you must
 92 |   invoke this option, you will need the dependencies indicated in the
 93 |   associated conda [environment file](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/buildpy.yml).
 94 | 
 95 | #### Environment variables
 96 | ##### Standard options
 97 | - `BASE_IMAGE`: The base image for Docker images or the build image for
 98 |   `build.py` if `--buildpy` is invoked
 99 | - `TRITON_VERSION`: The version of Triton to use for this build
100 | - `SERVER_TAG`: The tag to use for the server image
101 | - `TEST_TAG`: The tag to use for the test image
102 | - `PREBUILT_IMAGE`: An existing Triton Docker image which you would like to
103 |   run tests against. This will build the test image on top of the indicated
104 |   image.
105 | - `RAPIDS_VERSION`: The version of RAPIDS to require for RAPIDS
106 |   dependencies
107 | ##### Advanced options
108 | - `USE_CLIENT_WHEEL`: If 1, the Triton Python client will be
109 |   installed from a wheel distributed in the Triton SDK Docker image. This
110 |   option is useful for ARM development, since the Triton client cannot
111 |   currently be installed via `pip` for ARM.
112 | - `SDK_IMAGE`: If set, this image will be used to provide the
113 |   Python client wheel. Otherwise, if `USE_CLIENT_WHEEL` is set to 1 and this
114 |   variable is unset, the image will be selected based on the Triton
115 |   version.
116 | - `CONDA_DEV_TAG`: A Docker image containing the development conda
117 |   environment. Used primarily to speed up CI; rarely invoked during
118 |   development.
119 | - `CONDA_TEST_TAG`: A Docker image containing the test conda
120 |   environment. Used primarily to speed up CI; rarely invoked during development
121 | - `TRITON_REF`: The commit ref for the Triton server repo when using
122 |   `--buildpy`
123 | - `CORE_REF`: The commit ref for the Triton core repo when using
124 |   `--buildpy`
125 | - `COMMON_REF`: The commit ref for the Triton common repo when using
126 |   `--buildpy`
127 | - `BACKEND_REF`: The commit ref for the Triton backend repo when using
128 |   `--buildpy`
129 | - `THIRDPARTY_REF`: The commit ref for the Triton third-party repo when using
130 |   `--buildpy`
131 | - `JOB_ID`: Used for CI builds to uniquely identify a particular
132 |   build job.
133 | - `BUILDPY_BRANCH`: Use this branch of the Triton server repo to
134 |   provide the `build.py` script if `--buildpy` is used.
135 | - `TREELITE_STATIC`: if set to `ON`, Treelite will be statically linked into the built binaries
136 | 


--------------------------------------------------------------------------------
/docs/explainability.md:
--------------------------------------------------------------------------------
  1 | <!--
  2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | #  * Redistributions of source code must retain the above copyright
  8 | #    notice, this list of conditions and the following disclaimer.
  9 | #  * Redistributions in binary form must reproduce the above copyright
 10 | #    notice, this list of conditions and the following disclaimer in the
 11 | #    documentation and/or other materials provided with the distribution.
 12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 13 | #    contributors may be used to endorse or promote products derived
 14 | #    from this software without specific prior written permission.
 15 | #
 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | -->
 28 | 
 29 | # Model Explainability with Shapley Values
 30 | 
 31 | **NOTE: The CPU version of this feature is in an experimental state as of version 23.04**
 32 | 
 33 | In addition to providing model output from forest models, the FIL backend
 34 | can help you understand *why* the model came to a particular conclusion by
 35 | providing Shapley values. Shapley values offer a measure of the extent to
 36 | which individual features in an input contributed to the final model output.
 37 | Features with high Shapley value scores can generally be understood to be more
 38 | important to the model's conclusion than those with lower scores.
 39 | 
 40 | Generally speaking, Shapley values are computed by computing the model output
 41 | with and without a particular feature input and looking at how much the output
 42 | changed. This is referred to as the marginal contribution of that
 43 | feature. For a more complete understanding, check out the [Wikipedia
 44 | article](https://en.wikipedia.org/wiki/Shapley_value) on Shapley values or
 45 | Lloyd Shapley's [original
 46 | paper](https://www.rand.org/content/dam/rand/pubs/research_memoranda/2008/RM670.pdf).
 47 | 
 48 | **NOTE: Tree depth is limited to 32 for shapley value computation. Tree models with higher depth will throw an error.**
 49 | 
 50 | ## Using Shapley Values in the FIL Backend
 51 | Because it takes additional time to compute and return the relatively large
 52 | output arrays for Shapley values, Shapley value computation is turned off by
 53 | default in the FIL backend.
 54 | 
 55 | To turn on Shapley Value support, you must add an additional output to the
 56 | `config.pbtxt` file for your model as shown below:
 57 | ```protobuf
 58 | output [
 59 |  {
 60 |     name: "output__0"
 61 |     data_type: TYPE_FP32
 62 |     dims: [ 2 ]
 63 |   },
 64 |  {
 65 |     name: "treeshap_output"
 66 |     data_type: TYPE_FP32
 67 |     dims: [ 501 ]
 68 |   }
 69 | ]
 70 | backend: "fil"
 71 | max_batch_size: 32768
 72 | input [
 73 |  {
 74 |     name: "input__0"
 75 |     data_type: TYPE_FP32
 76 |     dims: [ $NUM_FEATURES ]
 77 |   }
 78 | ]
 79 | output [
 80 |  {
 81 |     name: "output__0"
 82 |     data_type: TYPE_FP32
 83 |     dims: [ 1 ]
 84 |  },
 85 |  {
 86 |     name: "treeshap_output"
 87 |     data_type: TYPE_FP32
 88 |     dims: [ $NUM_FEATURES_PLUS_ONE ]
 89 |   }
 90 | ]
 91 | instance_group [{ kind: KIND_AUTO }]
 92 | parameters [
 93 |   {
 94 |     key: "model_type"
 95 |     value: { string_value: "$MODEL_TYPE" }
 96 |   },
 97 |   {
 98 |     key: "output_class"
 99 |     value: { string_value: "$IS_A_CLASSIFIER" }
100 |   }
101 | ]
102 | 
103 | dynamic_batching {}
104 | ```
105 | Note that the length of the `treeshap_output` is equal to the number of input
106 | features plus one to account for the bias term in the Shapley output. For a
107 | working example of model deployment with Shapley values, including how to
108 | retrieve those values using Triton's Python client, check out the [FAQ
109 | Notebook](https://nbviewer.org/github/triton-inference-server/fil_backend/blob/main/notebooks/faq/FAQs.ipynb#$\color{#76b900}{\text{FAQ-12:-How-do-I-retrieve-Shapley-values-for-model-explainability?}}$)
110 | 


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Installation
30 | The FIL backend is a part of Triton and can be installed via the methods
31 | described in the [main Triton
32 | documentation](https://github.com/triton-inference-server/server#build-and-deploy).
33 | To quickly get up and running with a Triton Docker image, follow these
34 | steps.
35 | 
36 | **Note**: Looking for instructions to *build* the FIL backend yourself? Check out our [build
37 | guide](build.md).
38 | 
39 | ## Prerequisites
40 | - [Docker](https://docs.docker.com/get-docker/)
41 | - [The NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker)
42 | 
43 | ## Getting the container
44 | Triton containers are available from NGC and may be pulled down via
45 | 
46 | ```bash
47 | docker pull nvcr.io/nvidia/tritonserver:22.10-py3
48 | ```
49 | 
50 | Note that the FIL backend cannot be used in the `21.06` version of this
51 | container; the `21.06.1` patch release is the earliest Triton version with a
52 | working FIL backend implementation.
53 | 
54 | ## Starting the container
55 | In order to actually deploy a model, you will need to provide the serialized
56 | model and configuration file in a specially-structured directory called the
57 | "model repository." Check out the
58 | [configuration guide](docs/model_config.md) for details on how to do this for your model.
59 | 
60 | Assuming your model repository is on the host system, you can
61 | bind-mount it into the container and start the server via the following
62 | command:
63 | ```
64 | docker run --gpus all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${MODEL_REPO}:/models --name tritonserver nvcr.io/nvidia/tritonserver:22.11-py3 tritonserver --model-repository=/models
65 | ```
66 | Remember that bind-mounts **require an absolute path** to the host
67 | directory, so `${MODEL_REPO}` should be replaced by the absolute path to the
68 | model repository directory on the host.
69 | 
70 | Assuming you started your container with the name "tritonserver" as in the
71 | above snippet, you can bring the server down again and remove the
72 | container with:
73 | ```
74 | docker rm -f tritonserver
75 | ```
76 | 


--------------------------------------------------------------------------------
/docs/model_support.md:
--------------------------------------------------------------------------------
  1 | <!--
  2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | #  * Redistributions of source code must retain the above copyright
  8 | #    notice, this list of conditions and the following disclaimer.
  9 | #  * Redistributions in binary form must reproduce the above copyright
 10 | #    notice, this list of conditions and the following disclaimer in the
 11 | #    documentation and/or other materials provided with the distribution.
 12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 13 | #    contributors may be used to endorse or promote products derived
 14 | #    from this software without specific prior written permission.
 15 | #
 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | -->
 28 | 
 29 | # Model Support and Limitations
 30 | The FIL backend is designed to accelerate inference for **tree-based models**.
 31 | If the model you are trying to deploy is not tree-based, consider using one of
 32 | Triton's other backends.
 33 | 
 34 | ## Frameworks
 35 | The FIL backend supports most XGBoost and LightGBM models using their native
 36 | serialization formats. The FIL backend also supports the following model types
 37 | from [Scikit-Learn and cuML](sklearn_and_cuml.md) using Treelite's checkpoint serialization format:
 38 | 
 39 | - GradientBoostingClassifier
 40 | - GradientBoostingRegressor
 41 | - IsolationForest
 42 | - RandomForestRegressor
 43 | - ExtraTreesClassifier
 44 | - ExtraTreesRegressor
 45 | 
 46 | In addition, the FIL backend can perform inference on tree models from any
 47 | framework if they are first exported to Treelite's checkpoint serialization
 48 | format.
 49 | 
 50 | ## Serialization Formats
 51 | The FIL backend currently supports the following serialization formats:
 52 | 
 53 | - XGBoost JSON
 54 | - XGBoost UBJSON
 55 | - XGBoost Binary
 56 | - LightGBM Text
 57 | - Treelite binary checkpoint
 58 | 
 59 | The FIL backend does **not** support direct ingestion of Pickle files. The
 60 | pickled model must be converted to one of the above formats before it can be
 61 | used in Triton.
 62 | 
 63 | ## Version Compatibility
 64 | Until version 3.0 of Treelite, Treelite offered no backward compatibility
 65 | for its checkpoint format even among minor releases. Therefore, the version
 66 | of Treelite used to save a checkpoint had to exactly match the version used in
 67 | the FIL backend. Starting with version 3.0, Treelite supports checkpoint
 68 | output from any version of Treelite starting with 2.7 until the next major
 69 | release.
 70 | 
 71 | XGBoost's JSON format also changes periodically between minor versions, and
 72 | older versions of Treelite used in the FIL backend may not support those
 73 | changes.
 74 | 
 75 | The compatibility matrix for Treelite and XGBoost with the FIL backend is
 76 | shown below:
 77 | 
 78 | | Triton Version | Supported Treelite Version(s) | Supported XGBoost models               |
 79 | | -------------- | ----------------------------- | -------------------------------------- |
 80 | | 21.08          | 1.3.0                         | XGBoost JSON <1.6                      |
 81 | | 21.09-21.10    | 2.0.0                         | XGBoost JSON <1.6                      |
 82 | | 21.11-22.02    | 2.1.0                         | XGBoost JSON <1.6                      |
 83 | | 22.03-22.06    | 2.3.0                         | XGBoost JSON <1.6                      |
 84 | | 22.07          | 2.4.0                         | XGBoost JSON <1.7                      |
 85 | | 22.08-24.02    | 2.4.0; >=3.0.0,<4.0.0         | XGBoost JSON <1.7                      |
 86 | | 24.03+         | 3.9.0; >=4.0.0,<5.0.0         | XGBoost JSON 1.7+                      |
 87 | | 24.10+         | 3.9.0; >=4.0.0,<5.0.0         | XGBoost JSON 1.7+, XGBoost UBJSON 2.1+ |
 88 | 
 89 | ## Limitations
 90 | The FIL backend currently does not support any multi-output regression models.
 91 | 
 92 | ## Double-Precision Support
 93 | While the FIL backend can load double-precision models, it performs all
 94 | computations in single-precision mode. This can lead to slight differences in
 95 | model output for frameworks like LightGBM which natively use double precision.
 96 | Support for double-precision execution is planned for an upcoming release.
 97 | 
 98 | ## Categorical Feature Support
 99 | As of version 21.11, the FIL backend includes support for models with
100 | categorical features (e.g. some
101 | [XGBoost](https://xgboost.readthedocs.io/en/stable/tutorials/categorical.html) and [LightGBM ](https://lightgbm.readthedocs.io/en/latest/Advanced-Topics.html#categorical-feature-support)) models.
102 | These models can be deployed just like any other model, but it is worth
103 | remembering that (as with any other inference pipeline which includes
104 | categorical features), care must be taken to ensure that the categorical
105 | encoding used during inference matches that used during training. If the data
106 | passed through at inference time does not contain all of the categories used
107 | during training, there is no way to reconstruct the correct mapping of
108 | features, so some record must be made of the complete set of categories used
109 | during training. With that record, categorical columns can be appropriately
110 | converted to float32 columns, and submitted to Triton as with any other input.
111 | 
112 | For a fully-worked example of using a model with categorical features, check
113 | out the [introductory fraud detection notebook](https://nbviewer.org/github/triton-inference-server/fil_backend/blob/main/notebooks/categorical-fraud-detection/Fraud_Detection_Example.ipynb).
114 | 


--------------------------------------------------------------------------------
/docs/repo_overview.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Repo Overview
30 | 
31 | The FIL backend repo is organized in the following directories:
32 | 
33 | ## `ci`
34 | This directory contains scripts and configuration files for working with CI.
35 | Developers may invoke `ci/local/build.sh` to build and run tests locally or
36 | `ci/gitlab/build.sh` to more precisely mirror the test environment run in
37 | official CI. This directory is not intended for end users.
38 | 
39 | ## `cmake`
40 | This directory contains CMake files required for the build, especially those
41 | which are used to retrieve external dependencies. It is not intended for
42 | end users.
43 | 
44 | ## `conda`
45 | This directory contains conda-related infrastructure including environment yaml
46 | files used to construct build and test environments:
47 | 
48 | - `conda/environments/buildpy.yml`: Minimal environment for using Triton's
49 |   `build.py` build script
50 | - `conda/environments/rapids_triton_dev.yml`: Environment for building the FIL
51 |   backend
52 | - `conda/environments/triton_benchmark.yml`: Environment for running the FIL
53 |   backend's standard benchmarks
54 | - `conda/environments/triton_test_no_client.yml`: Environment for running tests
55 |   for the FIL backend. This file does not include Triton's Python client to
56 |   facilitate testing on ARM machines, where the client cannot be correctly
57 |   installed via pip.
58 | - `conda/environments/triton_test.yml`: Environment for running tests for the
59 |   FIL backend that includes Triton's Python client. Recommended environment for
60 |   those wishing to run tests outside of Docker.
61 | 
62 | ## `docs`
63 | This directory contains markdown files for documentation.
64 | 
65 | ## `notebooks`
66 | This directory contains example Jupyter notebooks for using the FIL backend.
67 | 
68 | ## `ops`
69 | This directory contains files used for build-related tasks including the
70 | Dockerfile for the FIL backend's dockerized build. It is not intended for end
71 | users.
72 | 
73 | ## `qa`
74 | This directory contains files for running tests and benchmarks. It is not
75 | intended for end users.
76 | 
77 | ## `scripts`
78 | This directory contains utility scripts for e.g. converting models to Treelite
79 | checkpoint format. It also contains a conda environment file indicating the
80 | necessary dependencies for running these scripts.
81 | 
82 | ## `src`
83 | This directory contains the C++ source files for the FIL backend. It is not
84 | intended for end users.
85 | 


--------------------------------------------------------------------------------
/docs/sklearn_and_cuml.md:
--------------------------------------------------------------------------------
  1 | <!--
  2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions
  6 | # are met:
  7 | #  * Redistributions of source code must retain the above copyright
  8 | #    notice, this list of conditions and the following disclaimer.
  9 | #  * Redistributions in binary form must reproduce the above copyright
 10 | #    notice, this list of conditions and the following disclaimer in the
 11 | #    documentation and/or other materials provided with the distribution.
 12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 13 | #    contributors may be used to endorse or promote products derived
 14 | #    from this software without specific prior written permission.
 15 | #
 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | -->
 28 | 
 29 | # Scikit-Learn and cuML Support
 30 | 
 31 | **NOTE:** Due to a change in Scikit-Learn 1.2.0, forest models from version
 32 | 1.2.0 and later are not currently supported. Support will be added in an
 33 | upcoming release of Triton.
 34 | 
 35 | ## Model Serialization
 36 | 
 37 | While LightGBM and XGBoost have their own serialization formats that are
 38 | directly supported by the Triton FIL backend, tree models trained with
 39 | [Scikit-Learn](https://scikit-learn.org/stable/modules/model_persistence.html)
 40 | or [cuML](https://docs.rapids.ai/api/cuml/stable/pickling_cuml_models.html) are
 41 | generally serialized using Python's
 42 | [pickle](https://docs.python.org/3/library/pickle.html) module. In order to
 43 | avoid a round-trip through Python in Triton, the FIL backend instead requires
 44 | that these pickled models first be converted to Treelite's binary checkpoint
 45 | format. Note that this also allows you to make use of *any* Treelite-supported
 46 | model framework in Triton simply by exporting to the binary checkpoint format.
 47 | 
 48 | The FIL backend repo includes scripts for easy conversion from
 49 | pickle-serialized cuML or Scikit-Learn models to Treelite checkpoints. You can
 50 | download the relevant script for Scikit-Learn
 51 | [here](https://raw.githubusercontent.com/triton-inference-server/fil_backend/main/scripts/convert_sklearn.py)
 52 | and for cuML
 53 | [here](https://raw.githubusercontent.com/triton-inference-server/fil_backend/main/scripts/convert_cuml.py).
 54 | 
 55 | ## Prerequisites
 56 | 
 57 | To use the Scikit-Learn conversion script, you must run it from within a Python
 58 | environment containing both
 59 | [Scikit-Learn](https://scikit-learn.org/stable/install.html) and
 60 | [Treelite](https://treelite.readthedocs.io/en/latest/install.html). To use the
 61 | cuML conversion script, you must run it from within a Python environment
 62 | containing [cuML](https://rapids.ai/start.html).
 63 | 
 64 | For convenience, a conda environment config file
 65 | [is provided](https://raw.githubusercontent.com/triton-inference-server/fil_backend/main/scripts/environment.yml)
 66 | which will install all three of these prerequisites:
 67 | 
 68 | ```
 69 | conda env create -f scripts/environment.yml
 70 | conda activate triton_scripts
 71 | ```
 72 | 
 73 | ## Converting to Treelite checkpoints
 74 | 
 75 | **NOTE:** The following steps are **not** necessary for LightGBM or XGBoost
 76 | models.  The FIL backend supports the native serialization formats for these
 77 | frameworks directly.
 78 | 
 79 | If you already have a Scikit-Learn or cuML RF model saved as a pickle file
 80 | (`model.pkl`), place it in a directory structure as follows:
 81 | 
 82 | ```
 83 | model_repository/
 84 | `-- fil
 85 |     |-- 1
 86 |     |   `-- model.pkl
 87 |     `-- config.pbtxt
 88 | ```
 89 | 
 90 | Then perform the conversion by running either:
 91 | ```bash
 92 | ./convert_sklearn.py model_repository/fil/1/model.pkl
 93 | ```
 94 | for Scikit-Learn models or
 95 | ```bash
 96 | ./convert_cuml.py model_repository/fil/1/model.pkl
 97 | ```
 98 | for cuML models. This will generate a `checkpoint.tl` file in the model
 99 | repository in the necessary location. You can then proceed as with any other
100 | model type, setting the `model_type` parameter in `config.pbtxt` to
101 | `"treelite_checkpoint"`.
102 | 
103 | Note that Treelite did not guarantee compatibility between minor release
104 | versions for its binary checkpoint model until version 3.0.0 and does not
105 | guarantee compatibility between major releases, so it is recommended that you
106 | keep the original pickle file. If you later make use of a newer version of
107 | Treelite, you can simple re-run the conversion on this pickle file.
108 | 


--------------------------------------------------------------------------------
/docs/tests.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Running Tests
30 | 
31 | For developers working on the FIL backend, the easiest way to run tests is to
32 | invoke the `ci/local/build.sh` script, which will build the server image
33 | and a test image then run a container based on that image which runs the
34 | complete test suite.
35 | 
36 | One of the most time-consuming parts of running the test suite is
37 | training the end-to-end test models. The `ci/local/build.sh` script will
38 | cache trained models between runs in `qa/L0_e2e/model_repository` and
39 | `qa/L0_e2e/cpu_model_repository`. Sometimes, you may make a change which
40 | invalidates previously generated models. In such cases, you can clear these
41 | directories in order to start fresh.
42 | 
43 | The `ci/local/build.sh` script uses the following environment variables to
44 | control build and execution of tests:
45 | 
46 | - `RETRAIN`: If set to 1, retrain test models.
47 | - `USE_CLIENT_WHEEL`: If set to 1, install the Triton client from a wheel
48 |   copied from Triton's SDK image. This is useful for testing on ARM
49 |   machines, where the Triton Python client is not available via pip.
50 | - `SDK_IMAGE`: If set, copy the Triton client wheel from this specific Docker
51 |   SDK image
52 | - `HOST_BUILD`: Build on the host rather than via Docker. This can be useful
53 |   for rapid iteration during development.
54 | - `TEST_PROFILE`: Either "dev" or "ci". This variable supplies the name of the
55 |   Hypothesis testing profile to use when running tests. The "ci" profile
56 |   runs more examples while the "dev" profile executes more quickly. Default
57 |   is "dev".
58 | 
59 | ## The CI Test Script
60 | In addition to `ci/local/build.sh`, the repo contains a
61 | `ci/gitlab/build.sh` script which is used to run tests in CI. It is
62 | sometimes useful to invoke this script to more closely replicate the CI
63 | environment. This script does *not* cache models in between runs and will
64 | generally run more and slower tests than those used for the `local` script.
65 | 
66 | The `ci/gitlab/build.sh` script uses the following environment variables
67 | to control build and execution of tests:
68 | 
69 | - `PREBUILT_SERVER_TAG`: Use this Docker image as the Triton server image
70 |   to test rather than building it.
71 | - `PREBUILT_TEST_TAG`: Use this Docker image as the Triton test image rather
72 |   than building it on top of the server image.
73 | - `MODEL_BUILDER_IMAGE`: Use this Docker image to train test models rather
74 |   than building an image.
75 | - `LOG_DIR`: A host directory used for storing test logs
76 | - `NV_DOCKER_ARGS`: A bash expression that when evaluated returns Docker
77 |   arguments used for controlling GPU access in CI
78 | - `BUILDPY`: If set to 1, build with Triton's `build.py` script rather than
79 |   the FIL backend Dockerfile.
80 | - `CPU_ONLY`: If set to 1, build without GPU support.
81 | - `NO_CACHE`: Set to 0 to enable Docker cache. **By default, caching is
82 |   disabled.**
83 | - `USE_CLIENT_WHEEL`: If set to 1, install the Triton client from a wheel
84 |   copied from Triton's SDK image. This is useful for testing on ARM
85 |   machines, where the Triton Python client is not available via pip.
86 | - `SDK_IMAGE`: If set, copy the Triton client wheel from this specific Docker
87 |   SDK image
88 | 
89 | ## Running Tests Manually
90 | It is *strongly* recommended that you use the provided test scripts for running
91 | tests. If you wish to run tests manually, you must generate test models using
92 | the `qa/generate_example_models.sh` script, start the Triton server against
93 | the generated model repository, and then run `pytest --repo qa/L0_e2e/model_repository qa/L0_e2e`.
94 | 
95 | This approach is not an officially supported testing method, and minimal
96 | support will be provided for it. If you find it useful and wish to
97 | contribute documentation to make this method easier, pull requests are
98 | welcome.
99 | 


--------------------------------------------------------------------------------
/docs/workflow.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Development Workflow
30 | The `ci/local/build.sh` script is intended to help automate build and testing
31 | during development. Usually it is sufficient to make a change and then run
32 | this script without arguments to validate the change.
33 | 
34 | For tasks which require frequent rebuilds, it is sometimes slightly faster
35 | to build on the host rather than in the container. In this case, running
36 | `HOST_BUILD=1 ./ci/local/build.sh` using the [rapids\_triton\_dev conda environment](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/rapids_triton_dev.yml) will perform the build of the FIL backend on the host but then use Docker to execute the tests in a controlled environment.
37 | 
38 | For complete information on other options that can be used with this script,
39 | see the [documentation on running tests](https://github.com/triton-inference-server/fil_backend/blob/main/conda/environments/rapids_triton_dev.yml).
40 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # FIL Backend Examples
30 | 
31 | This directory contains example notebooks which illustrate typical workflows
32 | and use-cases for the Triton FIL backend. Additional examples will be added to
33 | this directory over time.
34 | 
35 | Each subdirectory contains an example notebook and a README with instructions
36 | on how to run the example.
37 | 
38 | ## Current Examples
39 | - [Categorical Fraud
40 |   Example](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection):
41 |   This introductory example walks through training a categorical XGBoost model for fraud
42 |   detection and deploying it on both GPU-accelerated and CPU-only systems.
43 | - [FAQ
44 |   Notebook](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/faq):
45 |   This notebook answers a series of frequently asked questions around the FIL
46 |   backend for Triton and offers example code with practical applications of
47 |   those answers.
48 | 
49 | ## Deprecated Examples
50 | - [Simple
51 |   XGBoost](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/simple-xgboost):
52 |   This example has been superseded by the Categorical Fraud Example, which
53 |   offers a more succinct and up-to-date example of how to train and deploy an
54 |   XGBoost model.
55 | 


--------------------------------------------------------------------------------
/notebooks/categorical-fraud-detection/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Fraud Detection With Categorical XGBoost
30 | 
31 | This example notebook shows how to train and deploy an XGBoost model
32 | with categorical features in Triton using the FIL backend. We begin by training
33 | two separate models on a fraud detection dataset with categorical variables:
34 | one small model designed to maximize runtime performance and one larger model
35 | designed to maximize accurate and precise detection of fraud. We then deploy
36 | both models on CPU and GPU and compare their performance using Triton's
37 | `perf_analyzer`. Based on these results, we see that GPU deployment opens up
38 | the possibility of deploying a much larger and more accurate fraud model with
39 | higher throughput while also keeping to a tight latency budget.
40 | 
41 | ## Running the notebook
42 | In order to launch the Triton server, you will need
43 | [Docker](https://docs.docker.com/get-docker/) installed on your system. The
44 | rest of the notebook also requires a few Python dependencies. To easily install
45 | these additional dependencies, you may make use of the provided conda
46 | [environment
47 | file](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection/environment.yml)
48 | as follows:
49 | ```bash
50 | conda env create -f environment.yml
51 | ```
52 | You may then activate the conda environment and run the notebook as usual:
53 | ```bash
54 | conda activate triton_example
55 | jupyter notebook
56 | ```
57 | The Jupyter interface should now be accessible from a browser, and you can
58 | follow the instructions within the notebook itself from there.
59 | 


--------------------------------------------------------------------------------
/notebooks/categorical-fraud-detection/environment.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: triton_example
 3 | channels:
 4 |   - conda-forge
 5 |   - nvidia
 6 |   - rapidsai
 7 | dependencies:
 8 |   - cudatoolkit=12.8
 9 |   - cudf=25.04
10 |   - cuml=25.04
11 |   - cupy
12 |   - jupyter
13 |   - kaggle
14 |   - matplotlib
15 |   - numpy
16 |   - pandas
17 |   - pip
18 |   - python
19 |   - rapidsai::xgboost>=2.1
20 |   - scikit-learn>=1.5
21 |   - pip:
22 |       - tritonclient[all]
23 |       - protobuf
24 | 


--------------------------------------------------------------------------------
/notebooks/faq/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # FAQs and Advanced Features
30 | 
31 | Designed as a complete reference to features of the FIL backend and common
32 | tasks performed with it, this notebook provides answers to a series of FAQs
33 | along with code snippets demonstrating how to make practical use of those
34 | answers.
35 | 
36 | If you have never made use of the FIL backend before, it is recommended that
37 | you begin with the introductory [fraud detection notebook](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection#fraud-detection-with-categorical-xgboost). After working through this basic example, the FAQs notebook will offer answers to questions that go beyond the basics in order to get the most out of the FIL backend.
38 | 
39 | ## Running the notebook
40 | In order to launch the Triton server, you will need
41 | [Docker](https://docs.docker.com/get-docker/) installed on your system. The
42 | rest of the notebook also requires a few Python dependencies. To easily install
43 | these additional dependencies, you may make use of the provided conda
44 | [environment
45 | file](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/faq/environment.yml)
46 | as follows:
47 | ```bash
48 | conda env create -f environment.yml
49 | ```
50 | You may then activate the conda environment and run the notebook as usual:
51 | ```bash
52 | conda activate triton_faq_nb
53 | jupyter notebook
54 | ```
55 | The Jupyter interface should now be accessible from a browser, and you can
56 | follow the instructions within the notebook itself from there.
57 | 
58 | Note that depending on which model framework you choose to use with this
59 | notebook, you may not need all the dependencies listed in the conda environment
60 | file. Remove any that you do not wish to install before installing the
61 | environment.
62 | 


--------------------------------------------------------------------------------
/notebooks/faq/environment.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: triton_faq_nb
 3 | channels:
 4 |   - conda-forge
 5 |   - nvidia
 6 |   - rapidsai
 7 | dependencies:
 8 |   - cudatoolkit=12.8
 9 |   - cuml=25.04
10 |   - joblib
11 |   - jupyter
12 |   - lightgbm
13 |   - numpy
14 |   - pandas
15 |   - pip
16 |   - python
17 |   - skl2onnx
18 |   - treelite>=4.4
19 |   - rapidsai::xgboost>=2.1
20 |   - scikit-learn>=1.5
21 |   - pip:
22 |       - protobuf
23 |       - tritonclient[all]
24 | 


--------------------------------------------------------------------------------
/notebooks/simple-xgboost/README.md:
--------------------------------------------------------------------------------
 1 | # (DEPRECATED) Triton FIL backend with XGBoost
 2 | 
 3 | **THIS NOTEBOOK HAS BEEN DEPRECATED. FOR A SIMPLE AND CONCISE INTRODUCTION TO TRAINING AND DEPLOYING AN XGBOOST MODEL WITH THE FIL BACKEND, PLEASE SEE THE [CATEGORICAL FRAUD DETECTION](https://github.com/triton-inference-server/fil_backend/tree/main/notebooks/categorical-fraud-detection) EXAMPLE NOTEBOOK.**
 4 | 
 5 | This notebook will eventually be reworked, split into smaller parts, and reintroduced for a later release. It is left here for historical reference, but some cells are known not to work with the latest versions of various Triton components.
 6 | 
 7 | This notebook is a reference for deploying an XGBoost model on Triton with the FIL backend. The notebook explains how one can deploy XGBoost model in Triton, check deployment status and send inference requests, set concurrent model execution and dynamic batching and find the best deployment configuration using Model Analyzer.
 8 | 
 9 | ## Requirements
10 | * NVIDIA GPU (Pascal+ required, recommended GPUs: T4, V100 or A100)
11 | * [Latest NVIDIA driver](https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html)
12 | * [Docker](https://docs.docker.com/get-docker/)
13 | * [The NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker)
14 | 
15 | ## Run the Triton Inference Server container
16 | 
17 | **Note:** Due to a bug in release 21.07, Triton's `model_analyzer` cannot be used with the FIL backend. If you wish to use the model analyzer, please use release 21.08 or later.
18 | 
19 | Before running the container, clone the repository and then run the container:
20 | 
21 | ```
22 | git clone https://github.com/triton-inference-server/fil_backend.git
23 | cd fil_backend
24 | 
25 | docker run \
26 |   -it \
27 |   --gpus=all \
28 |   --rm \
29 |   --net=host \
30 |   --name triton_fil \
31 |   nvcr.io/nvidia/tritonserver:<tag>  # Put the appropriate tag here.
32 | ```
33 | 
34 | **Note:** The artifacts created by scripts inside the container are created with root permission. The user on host machine might not be able to modify the artifacts once the container exists. To avoid this issue, copy the notebook `docker cp simple_xgboost_example.ipynb <docker_ID>` and create the artifacts inside the container.
35 | 
36 | Now open up another terminal and copy the notebook from host into the container as follows:
37 | ```
38 | docker cp notebooks/ triton_fil:/
39 | ```
40 | 
41 | ## Starting Jupyter notebook
42 | In the previous terminal perform the following steps:
43 | 
44 | ### Install Jupyter notebook inside the Triton container
45 | ```
46 | pip3 install jupyter
47 | ```
48 | ### Run Jupyter notebook inside the Triton container
49 | Change directory to `/notebooks` folder and run the jupyter notebook:
50 | ```
51 | cd /notebooks
52 | jupyter notebook --allow-root --no-browser --port 7001
53 | ```
54 | 
55 | 


--------------------------------------------------------------------------------
/ops/Dockerfile:
--------------------------------------------------------------------------------
  1 | # syntax = docker/dockerfile:1.3
  2 | ###########################################################################################
  3 | # Arguments for controlling build details
  4 | ###########################################################################################
  5 | # Version of Triton to use
  6 | ARG TRITON_VERSION=25.05
  7 | # Base container image
  8 | ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
  9 | # Whether or not to enable GPU build
 10 | ARG TRITON_ENABLE_GPU=ON
 11 | # A Triton server image to use as base for test layers (skip actual build)
 12 | ARG SERVER_IMAGE=build-stage
 13 | # Whether or not to install Triton client from wheel in SDK image
 14 | ARG USE_CLIENT_WHEEL=0
 15 | # SDK container image (only used if USE_CLIENT_WHEEL==1)
 16 | ARG SDK_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-sdk
 17 | # Whether or not to use backend library prebuilt on host
 18 | ARG USE_HOST_LIB=0
 19 | 
 20 | FROM condaforge/miniforge3 as conda-base
 21 | COPY ./ops/gpuci_conda_retry /usr/bin/gpuci_conda_retry
 22 | COPY ./ops/gpuci_mamba_retry /usr/bin/gpuci_mamba_retry
 23 | RUN chmod +x /usr/bin/gpuci_conda_retry /usr/bin/gpuci_mamba_retry
 24 | 
 25 | RUN mkdir /conda
 26 | RUN gpuci_mamba_retry install -c conda-forge conda-pack=0.7
 27 | 
 28 | FROM conda-base as conda-dev
 29 | COPY ./conda/environments/rapids_triton_dev.yml /conda/environment.yml
 30 | RUN gpuci_mamba_retry create -n rapids_triton_dev \
 31 |  && gpuci_mamba_retry env update -n rapids_triton_dev -f /conda/environment.yml \
 32 |  && rm /conda/environment.yml
 33 | RUN conda-pack -n rapids_triton_dev -o /tmp/env.tar \
 34 |  && mkdir /conda/dev/ \
 35 |  && cd /conda/dev/ \
 36 |  && tar xf /tmp/env.tar \
 37 |  && rm /tmp/env.tar
 38 | RUN /conda/dev/bin/conda-unpack
 39 | 
 40 | # Stage for installing test dependencies
 41 | FROM conda-base as base-test-install
 42 | COPY ./conda/environments/triton_test_no_client.yml /environment.yml
 43 | 
 44 | RUN gpuci_mamba_retry create -n triton_test \
 45 |     && gpuci_mamba_retry env update -n triton_test -f /environment.yml \
 46 |     && rm /environment.yml
 47 | 
 48 | FROM base-test-install as wheel-install-0
 49 | RUN apt-get update \
 50 |     && apt-get install --no-install-recommends -y \
 51 |       build-essential \
 52 |       ca-certificates \
 53 |       git \
 54 |     && apt-get clean \
 55 |     && rm -rf /var/lib/apt/lists/* \
 56 |     && conda run --no-capture-output -n triton_test pip install tritonclient[all]
 57 | 
 58 | FROM ${SDK_IMAGE} as sdk-image
 59 | 
 60 | FROM base-test-install as wheel-install-1
 61 | COPY --from=sdk-image /workspace/install/python /sdk_install
 62 | RUN conda run --no-capture-output -n triton_test \
 63 |     pip install /sdk_install/tritonclient*manylinux*.whl \
 64 |  && rm -r /sdk_install
 65 | 
 66 | FROM wheel-install-${USE_CLIENT_WHEEL} as conda-test
 67 | RUN conda run --no-capture-output -n triton_test \
 68 |     pip install git+https://github.com/rapidsai/rapids-triton.git@branch-25.06#subdirectory=python
 69 | RUN conda-pack --ignore-missing-files -n triton_test -o /tmp/env.tar \
 70 |  && mkdir /conda/test/ \
 71 |  && cd /conda/test/ \
 72 |  && tar xf /tmp/env.tar \
 73 |  && rm /tmp/env.tar
 74 | RUN /conda/test/bin/conda-unpack
 75 | 
 76 | 
 77 | FROM ${BASE_IMAGE} as base
 78 | 
 79 | ENV PATH="/root/miniconda3/bin:${PATH}"
 80 | 
 81 | # In CI, CPU base image may not have curl, but it also does not need to update
 82 | # the cuda keys
 83 | RUN  if command -v curl; \
 84 |  then [ $(uname -m) = 'x86_64' ] \
 85 |  && curl -L -o /tmp/cuda-keyring.deb \
 86 |       https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
 87 |  || curl -L -o /tmp/cuda-keyring.deb \
 88 |       https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/cuda-keyring_1.0-1_all.deb; \
 89 |  dpkg -i /tmp/cuda-keyring.deb \
 90 |  && rm /tmp/cuda-keyring.deb; fi
 91 | 
 92 | RUN apt-get update \
 93 |     && apt-get install --no-install-recommends -y \
 94 |       build-essential \
 95 |       ca-certificates \
 96 |       git \
 97 |     && apt-get clean \
 98 |     && rm -rf /var/lib/apt/lists/*
 99 | 
100 | # Stage immediately before building; useful for build iteration
101 | FROM base as build-prep
102 | 
103 | RUN mkdir -p /rapids_triton/build /rapids_triton/install
104 | 
105 | COPY ./src /rapids_triton/src
106 | COPY ./CMakeLists.txt /rapids_triton
107 | COPY ./cmake /rapids_triton/cmake
108 | 
109 | ARG BACKEND_NAME=fil
110 | ENV BACKEND_NAME=$BACKEND_NAME
111 | 
112 | WORKDIR /rapids_triton/build
113 | 
114 | # Remove potentially stale build artifacts
115 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \
116 |     then \
117 |       rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \
118 |     else \
119 |       mkdir -p /opt/tritonserver/backends/${BACKEND_NAME}; \
120 |     fi
121 | 
122 | # Stage where build actually takes place
123 | FROM build-prep as build-stage
124 | 
125 | ARG TRITON_VERSION
126 | ENV TRITON_VERSION=$TRITON_VERSION
127 | 
128 | ARG BUILD_TYPE=Release
129 | ENV BUILD_TYPE=$BUILD_TYPE
130 | ARG BUILD_TESTS
131 | ENV BUILD_TESTS=$BUILD_TESTS
132 | ARG BUILD_EXAMPLE
133 | ENV BUILD_EXAMPLE=$BUILD_EXAMPLE
134 | 
135 | ARG TRITON_REPO_ORGANIZATION=https://github.com/triton-inference-server
136 | ENV TRITON_REPO_ORGANIZATION=$TRITON_REPO_ORGANIZATION
137 | ARG TRITON_CORE_REPO_TAG=r${TRITON_VERSION}
138 | ENV TRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG
139 | ARG TRITON_COMMON_REPO_TAG=r${TRITON_VERSION}
140 | ENV TRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG
141 | ARG TRITON_BACKEND_REPO_TAG=r${TRITON_VERSION}
142 | ENV TRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG
143 | ARG RAPIDS_TRITON_REPO_TAG=main
144 | ENV RAPIDS_TRITON_REPO_TAG=$RAPIDS_TRITON_REPO_TAG
145 | ARG RAPIDS_TRITON_REPO_PATH=https://github.com/rapidsai/rapids-triton.git
146 | ENV RAPIDS_TRITON_REPO_PATH=$RAPIDS_TRITON_REPO_PATH
147 | 
148 | ARG TRITON_ENABLE_GPU=ON
149 | ENV TRITON_ENABLE_GPU=$TRITON_ENABLE_GPU
150 | ARG TRITON_ENABLE_STATS=ON
151 | ENV TRITON_ENABLE_GPU=$TRITON_ENABLE_GPU
152 | 
153 | # Specify *minimum* version for all RAPIDS dependencies
154 | # Some RAPIDS deps may have later versions
155 | ARG RAPIDS_DEPENDENCIES_VERSION=25.06
156 | ENV RAPIDS_DEPENDENCIES_VERSION=$RAPIDS_DEPENDENCIES_VERSION
157 | 
158 | ARG TRITON_FIL_USE_TREELITE_STATIC=ON
159 | ENV TRITON_FIL_USE_TREELITE_STATIC=$TRITON_FIL_USE_TREELITE_STATIC
160 | 
161 | COPY --from=conda-dev /conda/dev /conda/dev
162 | 
163 | SHELL ["/bin/bash", "-c"]
164 | 
165 | RUN source /conda/dev/bin/activate \
166 |  && cmake \
167 |       --log-level=VERBOSE \
168 |       -GNinja \
169 |       -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
170 |       -DBUILD_TESTS="${BUILD_TESTS}" \
171 |       -DTRITON_REPO_ORGANIZATION="${TRITON_REPO_ORGANIZATION}" \
172 |       -DTRITON_CORE_REPO_TAG="${TRITON_CORE_REPO_TAG}" \
173 |       -DTRITON_COMMON_REPO_TAG="${TRITON_COMMON_REPO_TAG}" \
174 |       -DTRITON_BACKEND_REPO_TAG="${TRITON_BACKEND_REPO_TAG}" \
175 |       -DRAPIDS_TRITON_REPO_TAG="${RAPIDS_TRITON_REPO_TAG}" \
176 |       -DRAPIDS_TRITON_REPO_PATH="${RAPIDS_TRITON_REPO_PATH}" \
177 |       -DTRITON_ENABLE_GPU="${TRITON_ENABLE_GPU}" \
178 |       -DTRITON_ENABLE_STATS="${TRITON_ENABLE_STATS}" \
179 |       -DRAPIDS_DEPENDENCIES_VERSION="${RAPIDS_DEPENDENCIES_VERSION}" \
180 |       -DTRITON_FIL_USE_TREELITE_STATIC="${TRITON_FIL_USE_TREELITE_STATIC}" \
181 |       -DCMAKE_INSTALL_PREFIX=/rapids_triton/install \
182 |       ..;
183 | 
184 | ENV CCACHE_DIR=/ccache
185 | 
186 | ARG CCACHE_REMOTE_STORAGE
187 | 
188 | RUN --mount=type=cache,target=/ccache/ source /conda/dev/bin/activate && \
189 |     if [ -n "${CCACHE_REMOTE_STORAGE}" ] && which ccache ; then \
190 |       ccache --set-config=remote_only=true ; \
191 |       ccache --set-config=remote_storage=${CCACHE_REMOTE_STORAGE} ; \
192 |       ccache -p ; \
193 |     fi && \
194 |     ninja install
195 | 
196 | # Stage for generating testing image
197 | FROM ${SERVER_IMAGE} as test-host-0
198 | FROM ${SERVER_IMAGE} as test-host-1
199 | 
200 | ARG BACKEND_NAME=fil
201 | ENV BACKEND_NAME=$BACKEND_NAME
202 | 
203 | # Remove existing FIL backend install
204 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \
205 |     then \
206 |       rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \
207 |     fi
208 | COPY ./install/backends/fil /opt/tritonserver/backends/${BACKEND_NAME}
209 | 
210 | FROM test-host-${USE_HOST_LIB} as test-build
211 | 
212 | FROM ${SERVER_IMAGE} as test-stage
213 | ARG BACKEND_NAME=fil
214 | ENV BACKEND_NAME=$BACKEND_NAME
215 | 
216 | COPY --from=conda-test /conda/test /conda/test
217 | 
218 | # Remove existing FIL backend install
219 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \
220 |     then \
221 |       rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \
222 |     fi
223 | COPY --from=test-build \
224 |   /opt/tritonserver/backends/$BACKEND_NAME \
225 |   /opt/tritonserver/backends/$BACKEND_NAME
226 | 
227 | COPY qa /qa
228 | COPY scripts /scripts
229 | 
230 | ENTRYPOINT []
231 | CMD ["/bin/bash", "-c", "source /conda/test/bin/activate && /qa/entrypoint.sh"]
232 | 
233 | FROM ${BASE_IMAGE} as final
234 | 
235 | ARG BACKEND_NAME=fil
236 | ENV BACKEND_NAME=$BACKEND_NAME
237 | 
238 | RUN mkdir /models
239 | 
240 | # Remove existing FIL backend install
241 | RUN if [ -d /opt/tritonserver/backends/${BACKEND_NAME} ]; \
242 |     then \
243 |       rm -rf /opt/tritonserver/backends/${BACKEND_NAME}/*; \
244 |     fi
245 | 
246 | COPY --from=build-stage \
247 |   /opt/tritonserver/backends/$BACKEND_NAME \
248 |   /opt/tritonserver/backends/$BACKEND_NAME
249 | 


--------------------------------------------------------------------------------
/ops/E2E.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #  * Redistributions of source code must retain the above copyright
 8 | #    notice, this list of conditions and the following disclaimer.
 9 | #  * Redistributions in binary form must reproduce the above copyright
10 | #    notice, this list of conditions and the following disclaimer in the
11 | #    documentation and/or other materials provided with the distribution.
12 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
13 | #    contributors may be used to endorse or promote products derived
14 | #    from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | -->
28 | 
29 | # Customized End-to-End Builds
30 | 
31 | **This build option has been removed in version 21.11. It may be re-introduced
32 | at a later date. Please file an issue if you have need for greater build
33 | customization than is provided by standard build options.**
34 | 


--------------------------------------------------------------------------------
/ops/gpuci_conda_retry:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # This script taken from the RAPIDS gpuci_tools repo:
  3 | # https://github.com/rapidsai/gpuci-tools/blob/215d652dbef6f35c13597812f38058e82520b8e5/tools/gpuci_conda_retry
  4 | #
  5 | # gpuci_conda_retry
  6 | #
  7 | # wrapper for conda that retries the command after a CondaHTTPError,
  8 | # ChecksumMismatchError, or JSONDecodeError (ideally, any conda error that
  9 | # is normally resolved by retrying)
 10 | #
 11 | # This must be set in order for the script to recognize failing exit codes when
 12 | # output is piped to tee
 13 | #
 14 | # Example usage:
 15 | # $ gpuci_conda_retry install cudatoolkit=11.0 rapids=0.16
 16 | #
 17 | # Configurable options are set using the following env vars:
 18 | #
 19 | # GPUCI_CONDA_RETRY_MAX       - set to a positive integer to set the max number of retry
 20 | #                               attempts (attempts after the initial try).
 21 | #                               Default is 3 retries
 22 | #
 23 | # GPUCI_CONDA_RETRY_SLEEP     - set to a positive integer to set the duration, in
 24 | #                               seconds, to wait between retries.
 25 | #                               Default is a 10 second sleep
 26 | #
 27 | set -o pipefail
 28 | 
 29 | condaretry_help="
 30 | gpuci_conda_retry options:
 31 | 
 32 |    --condaretry_max_retries=n      Retry the conda command at most n times (default is 3)
 33 |    --condaretry_sleep_interval=n   Sleep n seconds between retries (default is 5)
 34 | 
 35 | ALSO gpuci_conda_retry options can be set using the following env vars:
 36 | 
 37 |     GPUCI_CONDA_RETRY_MAX       - set to a positive integer to set the max number of retry
 38 |                                   attempts (attempts after the initial try).
 39 |                                   Default is 3 retries
 40 | 
 41 |     GPUCI_CONDA_RETRY_SLEEP     - set to a positive integer to set the duration, in
 42 |                                   seconds, to wait between retries.
 43 |                                   Default is a 10 second sleep
 44 | ==========
 45 | "
 46 | max_retries=${GPUCI_CONDA_RETRY_MAX:=3}
 47 | sleep_interval=${GPUCI_CONDA_RETRY_SLEEP:=10}
 48 | exitcode=0
 49 | needToRetry=0
 50 | retries=0
 51 | args=""
 52 | 
 53 | # Temporarily set this to something else (eg. a script called "testConda" that
 54 | # prints "CondaHTTPError:" and exits with 1) for testing this script.
 55 | #condaCmd=./testConda
 56 | condaCmd=${CONDA_EXE:=conda}
 57 | 
 58 | # Function to output messages to stderr
 59 | # FIXME - extend `gpuci_logger` or make another script for this
 60 | function echo_stderr {
 61 |     echo "    [gpuci_conda_retry] $@" >&2
 62 | }
 63 | 
 64 | # Function to run conda and check output for specific retryable errors
 65 | # input variables:
 66 | #    condaCmd: the command used for running conda, which accepts the args
 67 | #              passed to this script
 68 | #    outfile: file to tee output to for checking, likely a temp file
 69 | # output variables:
 70 | #    exitcode: the exit code from running ${condaCmd} ${args}
 71 | #    needToRetry: 1 if the command should be retried, 0 if it should not be
 72 | function runConda {
 73 |     ${condaCmd} ${args} 2>&1| tee ${outfile}
 74 |     exitcode=$?
 75 |     needToRetry=0
 76 |     retryingMsg=""
 77 | 
 78 |     if (( ${exitcode} != 0 )); then
 79 |         # Show exit code
 80 |         echo_stderr "conda returned exit code: ${exitcode}"
 81 | 
 82 |         if grep -q CondaHTTPError: ${outfile}; then
 83 |             retryingMsg="Retrying, found 'CondaHTTPError:' in output..."
 84 |             needToRetry=1
 85 |         elif grep -q ChecksumMismatchError: ${outfile}; then
 86 |             retryingMsg="Retrying, found 'ChecksumMismatchError:' in output..."
 87 |             needToRetry=1
 88 |         elif grep -q JSONDecodeError: ${outfile}; then
 89 |             retryingMsg="Retrying, found 'JSONDecodeError:' in output..."
 90 |             needToRetry=1
 91 |         elif grep -q ChunkedEncodingError: ${outfile}; then
 92 |             retryingMsg="Retrying, found 'ChunkedEncodingError:' in output..."
 93 |             needToRetry=1
 94 |         elif grep -q CondaMultiError: ${outfile}; then
 95 |             retryingMsg="Retrying, found 'CondaMultiError:' in output..."
 96 |             needToRetry=1
 97 |         elif grep -q EOFError: ${outfile}; then
 98 |             retryingMsg="Retrying, found 'EOFError:' in output..."
 99 |             needToRetry=1
100 |         else
101 |             echo_stderr "Exiting, no retryable conda errors detected: 'ChecksumMismatchError:' or 'CondaHTTPError:' or 'JSONDecodeError:' or 'ChunkedEncodingError:' or 'CondaMultiError:' or 'EOFError:'"
102 |         fi
103 | 
104 |         if (( ${needToRetry} == 1 )) && \
105 |            (( ${retries} >= ${max_retries} )); then
106 |             # Catch instance where we run out of retries
107 |             echo_stderr "Exiting, reached max retries..."
108 |         else
109 |             # Give reason for retry
110 |             echo_stderr $retryingMsg
111 |         fi
112 | fi
113 | }
114 | 
115 | 
116 | # Process and remove args recognized only by this script, save others for conda
117 | # Process help separately
118 | for arg in $*; do
119 |    opt=${arg%%=*}
120 |    val=${arg##*=}
121 |    if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then
122 |       echo "${condaretry_help}"
123 |       ${condaCmd} --help
124 |       exit $?
125 |    elif [[ ${opt} == "--condaretry_max_retries" ]]; then
126 |       max_retries=${val}
127 |    elif [[ ${opt} == "--condaretry_sleep_interval" ]]; then
128 |       sleep_interval=${val}
129 |    else
130 |       args="${args} ${arg}"
131 |    fi
132 | done
133 | 
134 | # Run command
135 | outfile=$(mktemp)
136 | runConda ${args}
137 | 
138 | # Retry loop, only if needed
139 | while (( ${needToRetry} == 1 )) && \
140 |       (( ${retries} < ${max_retries} )); do
141 | 
142 |    retries=$(expr ${retries} + 1)
143 |    echo_stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..."
144 |    sleep ${sleep_interval}
145 |    echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..."
146 | 
147 |    runConda ${args}
148 | done
149 | 
150 | rm -f ${outfile}
151 | exit ${exitcode}
152 | 


--------------------------------------------------------------------------------
/ops/gpuci_mamba_retry:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # This script taken from the RAPIDS gpuci_tools repo:
  3 | # https://github.com/rapidsai/gpuci-tools/blob/215d652dbef6f35c13597812f38058e82520b8e5/tools/gpuci_mamba_retry
  4 | #
  5 | # gpuci_mamba_retry
  6 | #
  7 | # Wrapper for conda that retries the command after a CondaHTTPError,
  8 | # ChecksumMismatchError, or JSONDecodeError (ideally, any conda error that
  9 | # is normally resolved by retrying)
 10 | #
 11 | # This must be set in order for the script to recognize failing exit codes when
 12 | # output is piped to tee
 13 | #
 14 | # Example usage:
 15 | # $ gpuci_mamba_retry install cudatoolkit=11.0 rapids=0.16
 16 | #
 17 | # Configurable options are set using the following env vars:
 18 | #
 19 | # GPUCI_MAMBA_RETRY_MAX       - set to a positive integer to set the max number of retry
 20 | #                               attempts (attempts after the initial try).
 21 | #                               Default is 3 retries
 22 | #
 23 | # GPUCI_MAMBA_RETRY_SLEEP     - set to a positive integer to set the duration, in
 24 | #                               seconds, to wait between retries.
 25 | #                               Default is a 10 second sleep
 26 | #
 27 | set -o pipefail
 28 | 
 29 | mambaretry_help="
 30 | gpuci_mamba_retry options:
 31 | 
 32 |    --mambaretry_max_retries=n      Retry the conda command at most n times (default is 3)
 33 |    --mambaretry_sleep_interval=n   Sleep n seconds between retries (default is 5)
 34 | 
 35 | ALSO gpuci_mamba_retry options can be set using the following env vars:
 36 | 
 37 |     GPUCI_MAMBA_RETRY_MAX       - set to a positive integer to set the max number of retry
 38 |                                   attempts (attempts after the initial try).
 39 |                                   Default is 3 retries
 40 | 
 41 |     GPUCI_MAMBA_RETRY_SLEEP     - set to a positive integer to set the duration, in
 42 |                                   seconds, to wait between retries.
 43 |                                   Default is a 10 second sleep
 44 | ==========
 45 | "
 46 | max_retries=${GPUCI_MAMBA_RETRY_MAX:=3}
 47 | sleep_interval=${GPUCI_MAMBA_RETRY_SLEEP:=10}
 48 | exitcode=0
 49 | needToRetry=0
 50 | retries=0
 51 | args=""
 52 | 
 53 | # Temporarily set this to something else (eg. a script called "testConda" that
 54 | # prints "CondaHTTPError:" and exits with 1) for testing this script.
 55 | #mambaCmd=./testConda
 56 | mambaCmd=${MAMBA_BIN:=mamba}
 57 | 
 58 | # Function to output messages to stderr
 59 | # FIXME - extend `gpuci_logger` or make another script for this
 60 | function echo_stderr {
 61 |     echo "    [gpuci_mamba_retry] $@" >&2
 62 | }
 63 | 
 64 | # Function to run conda and check output for specific retryable errors
 65 | # input variables:
 66 | #    mambaCmd: the command used for running conda, which accepts the args
 67 | #              passed to this script
 68 | #    outfile: file to tee output to for checking, likely a temp file
 69 | # output variables:
 70 | #    exitcode: the exit code from running ${mambaCmd} ${args}
 71 | #    needToRetry: 1 if the command should be retried, 0 if it should not be
 72 | function runMamba {
 73 |     ${mambaCmd} ${args} 2>&1| tee ${outfile}
 74 |     exitcode=$?
 75 |     needToRetry=0
 76 |     retryingMsg=""
 77 | 
 78 |     if (( ${exitcode} != 0 )); then
 79 |         # Show exit code
 80 |         echo_stderr "Failed, mamba returned exit code: ${exitcode}"
 81 | 
 82 |         if grep -q CondaHTTPError: ${outfile}; then
 83 |             retryingMsg="Retrying, found 'CondaHTTPError:' in output..."
 84 |             needToRetry=1
 85 |         elif grep -q ChecksumMismatchError: ${outfile}; then
 86 |             retryingMsg="Retrying, found 'ChecksumMismatchError:' in output..."
 87 |             needToRetry=1
 88 |         elif grep -q JSONDecodeError: ${outfile}; then
 89 |             retryingMsg="Retrying, found 'JSONDecodeError:' in output..."
 90 |             needToRetry=1
 91 |         elif grep -q EOFError: ${outfile}; then
 92 |             retryingMsg="Retrying, found 'EOFError:' in output..."
 93 |             needToRetry=1
 94 |         else
 95 |             echo_stderr "Exiting, no retryable mamba errors detected: 'ChecksumMismatchError:' or 'CondaHTTPError:' or 'JSONDecodeError:' or 'EOFError:'"
 96 |         fi
 97 | 
 98 |         if (( ${needToRetry} == 1 )) && \
 99 |            (( ${retries} >= ${max_retries} )); then
100 |             # Catch instance where we run out of retries
101 |             echo_stderr "Exiting, reached max retries..."
102 |         else
103 |             # Give reason for retry
104 |             echo_stderr $retryingMsg
105 |         fi
106 | fi
107 | }
108 | 
109 | 
110 | # Process and remove args recognized only by this script, save others for conda
111 | # Process help separately
112 | for arg in $*; do
113 |    opt=${arg%%=*}
114 |    val=${arg##*=}
115 |    if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then
116 |       echo "${mambaretry_help}"
117 |       ${mambaCmd} --help
118 |       exit $?
119 |    elif [[ ${opt} == "--mambaretry_max_retries" ]]; then
120 |       max_retries=${val}
121 |    elif [[ ${opt} == "--mambaretry_sleep_interval" ]]; then
122 |       sleep_interval=${val}
123 |    else
124 |       args="${args} ${arg}"
125 |    fi
126 | done
127 | 
128 | # Run command
129 | outfile=$(mktemp)
130 | runMamba ${args}
131 | 
132 | # Retry loop, only if needed
133 | while (( ${needToRetry} == 1 )) && \
134 |       (( ${retries} < ${max_retries} )); do
135 | 
136 |    retries=$(expr ${retries} + 1)
137 |    echo_stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..."
138 |    sleep ${sleep_interval}
139 |    echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..."
140 | 
141 |    runMamba ${args}
142 | done
143 | 
144 | rm -f ${outfile}
145 | exit ${exitcode}
146 | 


--------------------------------------------------------------------------------
/ops/move_deps.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os
  4 | import re
  5 | import shutil
  6 | import subprocess
  7 | from pathlib import Path
  8 | 
  9 | MISSING_REGEX = re.compile(r"\n\t(.+)\ =>\ not\ found")
 10 | FOUND_REGEX = re.compile(r"\n\t(.+)\ =>\ (.+)\ (\(0[xX][0-9a-fA-F]+\))")
 11 | 
 12 | 
 13 | def ldd(path):
 14 |     """Get output of ldd for given file"""
 15 |     ldd_out = subprocess.run(["ldd", path], check=True, capture_output=True, text=True)
 16 |     return ldd_out.stdout
 17 | 
 18 | 
 19 | def get_missing_deps(ldd_output):
 20 |     """Return iterator of missing dependencies in ldd output"""
 21 |     for match in MISSING_REGEX.finditer(ldd_output):
 22 |         yield match.group(1)
 23 | 
 24 | 
 25 | def path_contains(parent, child):
 26 |     """Check if first path contains the child path"""
 27 |     parent = os.path.abspath(parent)
 28 |     child = os.path.abspath(child)
 29 |     return parent == os.path.commonpath([parent, child])
 30 | 
 31 | 
 32 | def get_deps_map(ldd_output, required_dir=None):
 33 |     """Return dictionary mapping library names to paths"""
 34 |     deps_map = {}
 35 |     for match in FOUND_REGEX.finditer(ldd_output):
 36 |         if required_dir is None or path_contains(required_dir, match.group(2)):
 37 |             deps_map[match.group(1)] = match.group(2)
 38 |     return deps_map
 39 | 
 40 | 
 41 | def move_dependencies():
 42 |     """Move FIL backend dependencies from conda build environment to install
 43 |     directory
 44 | 
 45 |     The FIL backend library is built within a a conda environment containing
 46 |     all required shared libraries for deploying the backend. This function
 47 |     analyzes ldd output to determine what libraries FIL links against in its
 48 |     build environment as well as what libraries will be missing in the final
 49 |     install location. It then moves missing libraries to the final install
 50 |     location and repeats the analysis until it has satisfied as many missing
 51 |     dependencies as possible.
 52 |     """
 53 |     fil_lib = os.getenv("FIL_LIB", "libtriton_fil.so")
 54 |     lib_dir = os.getenv("LIB_DIR", "/usr/lib")
 55 | 
 56 |     conda_lib_dir = os.getenv("CONDA_LIB_DIR")
 57 |     if conda_lib_dir is None:
 58 |         conda_prefix = os.getenv("CONDA_PREFIX")
 59 |         if conda_prefix is None:
 60 |             raise RuntimeError(
 61 |                 "Must set CONDA_LIB_DIR to conda environment lib directory"
 62 |             )
 63 |         conda_lib_dir = os.path.join(conda_prefix, "lib")
 64 | 
 65 |     Path(lib_dir).mkdir(parents=True, exist_ok=True)
 66 | 
 67 |     # Set RUNPATH to conda lib directory to determine locations of
 68 |     # conda-provided dependencies
 69 |     subprocess.run(["patchelf", "--set-rpath", conda_lib_dir, fil_lib], check=True)
 70 | 
 71 |     ldd_out = ldd(fil_lib)
 72 |     expected_missing = set(get_missing_deps(ldd_out))
 73 |     deps_map = get_deps_map(ldd_out, required_dir=conda_lib_dir)
 74 | 
 75 |     # Set RUNPATH to final dependency directory
 76 |     subprocess.run(["patchelf", "--set-rpath", lib_dir, fil_lib], check=True)
 77 | 
 78 |     prev_missing = {
 79 |         None,
 80 |     }
 81 |     cur_missing = set()
 82 |     while prev_missing != cur_missing:
 83 |         prev_missing = cur_missing
 84 |         cur_missing = set(get_missing_deps(ldd(fil_lib)))
 85 |         for missing_dep in cur_missing:
 86 |             try:
 87 |                 lib_path = deps_map[missing_dep]
 88 |             except KeyError:
 89 |                 continue
 90 |             shutil.copy(lib_path, lib_dir)
 91 | 
 92 |     remaining = cur_missing - expected_missing
 93 |     if remaining != {}:
 94 |         print("Could not find the following dependencies:")
 95 |         for lib in sorted(remaining):
 96 |             print(lib)
 97 |     else:
 98 |         print("All dependencies found")
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     move_dependencies()
103 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | [tool.codespell]
28 | # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
29 | # this is only to allow you to run codespell interactively
30 | skip = "./.git,./.github"
31 | # ignore short words, and typename parameters like OffsetT
32 | ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
33 | # use the 'clear' dictionary for unambiguous spelling mistakes
34 | builtin = "clear"
35 | # disable warnings about binary files and wrong encoding
36 | quiet-level = 3
37 | 
38 | [tool.isort]
39 | profile = "black"
40 | use_parentheses = true
41 | multi_line_output = 3
42 | include_trailing_comma = true
43 | force_grid_wrap = 0
44 | ensure_newline_before_comments = true
45 | line_length = 88
46 | balanced_wrapping = true
47 | indent = "    "
48 | skip = ["build"]
49 | 
50 | 


--------------------------------------------------------------------------------
/qa/BENCHMARKS.md:
--------------------------------------------------------------------------------
 1 | # FIL Backend Benchmarks
 2 | 
 3 | **WARNING: The models which were used for this benchmarking script have
 4 | temporarily been removed. They will be restored using a storage solution other
 5 | than Git LFS at a later date.**
 6 | 
 7 | In order to facilitate performance analysis during development of the FIL
 8 | backend, the `qa/run_benchmarks.sh` scripts can run a simple set of benchmarks
 9 | against standard models. To run this script, first install the benchmarking
10 | conda environment:
11 | ```bash
12 | conda env create -f conda/environments/triton_benchmark.yml
13 | ```
14 | 
15 | Next, start the Triton server with the provided benchmark models. Note that you
16 | will need [git lfs](https://git-lfs.github.com/) to checkout these models. You
17 | may start the server by running the following command from the repo root:
18 | 
19 | ```bash
20 | docker run \
21 |   --rm \
22 |   --gpus=all \
23 |   --name benchmark_server \
24 |   -p 8000:8000 \
25 |   -p 8001:8001 \
26 |   -p 8002:8002 \
27 |   -v $PWD/qa/benchmark_repo:/models \
28 |   triton_fil \
29 |   tritonserver \
30 |   --model-repository=/models
31 | ```
32 | 
33 | Here, `triton_fil` is used as the Docker image, since this is the standard tag
34 | used during development, but you may run the benchmarks against any Triton
35 | image which contains the FIL backend.
36 | 
37 | In a separate terminal, you may now invoke the benchmark script itself as
38 | follows:
39 | ```bash
40 | conda activate triton_benchmark
41 | ./qa/run_benchmarks.sh
42 | ```
43 | 
44 | The benchmark script will provide output in the `qa/benchmark_output`
45 | directory. Each model tested will have its own directory with `.csv` files
46 | representing results for various batch sizes. The `summary` directory will also
47 | contain a `.csv` collating the data from each run as well as a `.png` showing
48 | throughput vs. p99 latency for all tested models on a single graph.
49 | 
50 | The benchmark script can be configured using a few different environment
51 | variables, summarized below:
52 | - `MODELS`: A space-separated list of the models to benchmark (defaults to
53 |   standard benchmarking models)
54 | - `BATCHES`: A space-separated list of the batch sizes to use during
55 |   benchmarking (defaults to `'1 16 128 1024'`)
56 | - `MAX_LATENCY`: The maximum latency (in ms) to explore during benchmarking
57 |   (defaults to 5 ms)
58 | 


--------------------------------------------------------------------------------
/qa/L0_e2e/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from hypothesis import settings
 4 | 
 5 | settings.register_profile("dev", max_examples=10)
 6 | settings.register_profile("ci", max_examples=100)
 7 | 
 8 | 
 9 | def pytest_addoption(parser):
10 |     default_repo_path = os.path.join(
11 |         os.path.dirname(os.path.abspath(__file__)), "model_repository"
12 |     )
13 |     parser.addoption("--repo", action="store", default=default_repo_path)
14 | 


--------------------------------------------------------------------------------
/qa/benchmark_repo/large_model-cpu/1/xgboost.json:
--------------------------------------------------------------------------------
1 | ../../large.json


--------------------------------------------------------------------------------
/qa/benchmark_repo/large_model-cpu/config.pbtxt:
--------------------------------------------------------------------------------
 1 | backend: "fil"
 2 | max_batch_size: 6329
 3 | input [
 4 |  {
 5 |     name: "input__0"
 6 |     data_type: TYPE_FP32
 7 |     dims: [ 393 ]
 8 |   }
 9 | ]
10 | output [
11 |  {
12 |     name: "output__0"
13 |     data_type: TYPE_FP32
14 |     dims: [ 2 ]
15 |   }
16 | ]
17 | instance_group [{ kind: KIND_CPU }]
18 | parameters [
19 |   {
20 |     key: "model_type"
21 |     value: { string_value: "xgboost_json" }
22 |   },
23 |   {
24 |     key: "predict_proba"
25 |     value: { string_value: "true" }
26 |   },
27 |   {
28 |     key: "output_class"
29 |     value: { string_value: "true" }
30 |   },
31 |   {
32 |     key: "threshold"
33 |     value: { string_value: "0.5" }
34 |   },
35 |   {
36 |     key: "storage_type"
37 |     value: { string_value: "AUTO" }
38 |   }
39 | ]
40 | 
41 | dynamic_batching { }
42 | 


--------------------------------------------------------------------------------
/qa/benchmark_repo/large_model/1/xgboost.json:
--------------------------------------------------------------------------------
1 | ../../large.json


--------------------------------------------------------------------------------
/qa/benchmark_repo/large_model/config.pbtxt:
--------------------------------------------------------------------------------
 1 | backend: "fil"
 2 | max_batch_size: 6329
 3 | input [
 4 |  {
 5 |     name: "input__0"
 6 |     data_type: TYPE_FP32
 7 |     dims: [ 393 ]
 8 |   }
 9 | ]
10 | output [
11 |  {
12 |     name: "output__0"
13 |     data_type: TYPE_FP32
14 |     dims: [ 2 ]
15 |   }
16 | ]
17 | instance_group [{ kind: KIND_GPU }]
18 | parameters [
19 |   {
20 |     key: "model_type"
21 |     value: { string_value: "xgboost_json" }
22 |   },
23 |   {
24 |     key: "predict_proba"
25 |     value: { string_value: "true" }
26 |   },
27 |   {
28 |     key: "output_class"
29 |     value: { string_value: "true" }
30 |   },
31 |   {
32 |     key: "threshold"
33 |     value: { string_value: "0.5" }
34 |   },
35 |   {
36 |     key: "storage_type"
37 |     value: { string_value: "AUTO" }
38 |   }
39 | ]
40 | 
41 | dynamic_batching { }
42 | 


--------------------------------------------------------------------------------
/qa/benchmark_repo/small_model-cpu/1/xgboost.json:
--------------------------------------------------------------------------------
1 | ../../small.json


--------------------------------------------------------------------------------
/qa/benchmark_repo/small_model-cpu/config.pbtxt:
--------------------------------------------------------------------------------
 1 | backend: "fil"
 2 | max_batch_size: 6329
 3 | input [
 4 |  {
 5 |     name: "input__0"
 6 |     data_type: TYPE_FP32
 7 |     dims: [ 393 ]
 8 |   }
 9 | ]
10 | output [
11 |  {
12 |     name: "output__0"
13 |     data_type: TYPE_FP32
14 |     dims: [ 2 ]
15 |   }
16 | ]
17 | instance_group [{ kind: KIND_CPU }]
18 | parameters [
19 |   {
20 |     key: "model_type"
21 |     value: { string_value: "xgboost_json" }
22 |   },
23 |   {
24 |     key: "predict_proba"
25 |     value: { string_value: "true" }
26 |   },
27 |   {
28 |     key: "output_class"
29 |     value: { string_value: "true" }
30 |   },
31 |   {
32 |     key: "threshold"
33 |     value: { string_value: "0.5" }
34 |   },
35 |   {
36 |     key: "storage_type"
37 |     value: { string_value: "AUTO" }
38 |   }
39 | ]
40 | 
41 | dynamic_batching { }
42 | 


--------------------------------------------------------------------------------
/qa/benchmark_repo/small_model/1/xgboost.json:
--------------------------------------------------------------------------------
1 | ../../small.json


--------------------------------------------------------------------------------
/qa/benchmark_repo/small_model/config.pbtxt:
--------------------------------------------------------------------------------
 1 | backend: "fil"
 2 | max_batch_size: 6329
 3 | input [
 4 |  {
 5 |     name: "input__0"
 6 |     data_type: TYPE_FP32
 7 |     dims: [ 393 ]
 8 |   }
 9 | ]
10 | output [
11 |  {
12 |     name: "output__0"
13 |     data_type: TYPE_FP32
14 |     dims: [ 2 ]
15 |   }
16 | ]
17 | instance_group [{ kind: KIND_GPU }]
18 | parameters [
19 |   {
20 |     key: "model_type"
21 |     value: { string_value: "xgboost_json" }
22 |   },
23 |   {
24 |     key: "predict_proba"
25 |     value: { string_value: "true" }
26 |   },
27 |   {
28 |     key: "output_class"
29 |     value: { string_value: "true" }
30 |   },
31 |   {
32 |     key: "threshold"
33 |     value: { string_value: "0.5" }
34 |   },
35 |   {
36 |     key: "storage_type"
37 |     value: { string_value: "AUTO" }
38 |   }
39 | ]
40 | 
41 | dynamic_batching { }
42 | 


--------------------------------------------------------------------------------
/qa/collate_benchmarks.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import re
  4 | import sys
  5 | 
  6 | import cudf
  7 | import numpy as np
  8 | from scipy.spatial import ConvexHull
  9 | 
 10 | try:
 11 |     import matplotlib.pyplot as plt
 12 | except ImportError:
 13 |     plt = None
 14 | 
 15 | BATCH_FILE_RE = re.compile(r"([0-9]+)\.csv")
 16 | SUMMARY_DIR_NAME = "summary"
 17 | 
 18 | 
 19 | def gather_perf_reports(benchmark_dir):
 20 |     _, model_dirs, _ = next(os.walk(benchmark_dir))
 21 |     for model in model_dirs:
 22 |         if model != SUMMARY_DIR_NAME:
 23 |             model_dir = os.path.join(benchmark_dir, model)
 24 |             for file_ in os.listdir(model_dir):
 25 |                 file_match = BATCH_FILE_RE.match(file_)
 26 |                 if file_match:
 27 |                     batch = int(file_match.groups()[0])
 28 |                     data = cudf.read_csv(os.path.join(model_dir, file_))
 29 |                     yield model, batch, data
 30 | 
 31 | 
 32 | def collate_raw_data(benchmark_dir):
 33 |     all_data = []
 34 |     for model, batch, data in gather_perf_reports(benchmark_dir):
 35 |         annotations = cudf.DataFrame(
 36 |             {"Model": [model] * data.shape[0], "Batch Size": [batch] * data.shape[0]},
 37 |             columns=("Model", "Batch Size"),
 38 |         )
 39 |         all_data.append(cudf.concat([annotations, data], axis=1))
 40 |     return cudf.concat(all_data, axis=0, ignore_index=True)
 41 | 
 42 | 
 43 | def pts_to_line(pt1, pt2):
 44 |     slope = (pt2[1] - pt1[1]) / (pt2[0] - pt1[0])
 45 |     intercept = pt1[1] - slope * pt1[0]
 46 |     return (slope, intercept)
 47 | 
 48 | 
 49 | def scatter_to_hull(pts):
 50 |     hull = ConvexHull(pts)
 51 |     pts = pts[hull.vertices]
 52 |     pts = pts[pts[:, 0].argsort(), :]
 53 |     slope, intercept = pts_to_line(pts[0, :], pts[-1, :])
 54 |     filtered_pts = pts[pts[:, 1] >= slope * pts[:, 0] + intercept]
 55 |     return np.concatenate((pts[(0,), :], filtered_pts, pts[(-1,), :]))
 56 | 
 57 | 
 58 | def plot_lat_tp(data, latency_percentile=99):
 59 |     all_models = data["Model"].unique().to_pandas()
 60 |     plt.xscale("log")
 61 |     plt.yscale("log")
 62 |     for model in all_models:
 63 |         model_data = raw_data.loc[data["Model"] == model].to_pandas()
 64 |         hull = scatter_to_hull(
 65 |             model_data[[f"p{latency_percentile} latency", "Inferences/Second"]].values
 66 |         )
 67 |         plt.plot(hull[:, 0], hull[:, 1], "-", label=model)
 68 |     plt.title("Throughput vs. Latency (log-log)")
 69 |     plt.xlabel("p99 Latency (microseconds)")
 70 |     plt.ylabel("Throughput (samples/s)")
 71 |     plt.legend(all_models)
 72 | 
 73 | 
 74 | def plot_througput(data, budget, output_dir):
 75 |     filtered_data = data[data["p99 latency"] <= budget][["Model", "Inferences/Second"]]
 76 |     maximums = filtered_data.groupby("Model").max()
 77 |     maximums.sort_index(inplace=True)
 78 | 
 79 |     budget_ms = round(budget / 1000)
 80 | 
 81 |     raw_data.to_csv(os.path.join(output_dir, f"{budget_ms}.csv"))
 82 | 
 83 |     if plt is not None:
 84 |         plt.bar(maximums.index.values_host, maximums["Inferences/Second"].values_host)
 85 |         plt.xticks(rotation=90)
 86 |         plt.title(f"Throughput for p99 latency budget of {budget_ms} ms")
 87 |         plt.subplots_adjust(bottom=0.35)
 88 |         plt.savefig(os.path.join(output_dir, f"{budget_ms}.png"))
 89 |         plt.close()
 90 | 
 91 | 
 92 | if __name__ == "__main__":
 93 |     benchmark_dir = sys.argv[1]
 94 |     raw_data = collate_raw_data(benchmark_dir)
 95 |     summary_dir = os.path.join(benchmark_dir, SUMMARY_DIR_NAME)
 96 |     throughput_dir = os.path.join(summary_dir, "throughput")
 97 |     os.makedirs(throughput_dir, exist_ok=True)
 98 |     raw_data.to_csv(os.path.join(summary_dir, "raw_data.csv"))
 99 | 
100 |     try:
101 |         latency_cutoff = float(os.environ["MAX_LATENCY"])
102 |         raw_data = raw_data[raw_data["p99 latency"] <= (latency_cutoff * 1000)]
103 |     except KeyError:
104 |         pass  # No latency cutoff specified
105 | 
106 |     raw_data.to_csv(os.path.join(summary_dir, "filtered_data.csv"))
107 | 
108 |     plot_througput(raw_data, 1000, throughput_dir)
109 |     plot_througput(raw_data, 5000, throughput_dir)
110 |     plot_througput(raw_data, 20000, throughput_dir)
111 | 
112 |     if plt is not None:
113 |         plot_lat_tp(raw_data)
114 |         plt.savefig(os.path.join(summary_dir, "latency_throughput.png"))
115 | 


--------------------------------------------------------------------------------
/qa/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | set -e
17 | 
18 | QA_DIR=$(cd $(dirname $0); pwd)
19 | TEST_SCRIPT="$QA_DIR/run_tests.sh"
20 | 
21 | if [[ $TRITON_ENABLE_GPU != "OFF" ]]
22 | then
23 |   echo 'Running tests for GPU models...'
24 |   MODEL_REPO="${QA_DIR}/L0_e2e/model_repository" "$TEST_SCRIPT"
25 |   echo 'Running tests for CPU models...'
26 |   MODEL_REPO="${QA_DIR}/L0_e2e/cpu_model_repository" "$TEST_SCRIPT"
27 | fi
28 | 
29 | echo 'Running tests without visible GPUs...'
30 | CPU_ONLY=1 MODEL_REPO="${QA_DIR}/L0_e2e/cpu_model_repository" "$TEST_SCRIPT"
31 | 


--------------------------------------------------------------------------------
/qa/generate_example_models.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright (c) 2021, NVIDIA CORPORATION.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | set -e
 17 | 
 18 | RETRAIN=${RETRAIN:-0}
 19 | 
 20 | QA_DIR=$(cd $(dirname $0); pwd)
 21 | MODEL_REPO="${QA_DIR}/L0_e2e/model_repository"
 22 | CPU_MODEL_REPO="${QA_DIR}/L0_e2e/cpu_model_repository"
 23 | 
 24 | SCRIPTS_DIR="${QA_DIR}/../scripts"
 25 | GENERATOR_SCRIPT="python ${QA_DIR}/L0_e2e/generate_example_model.py"
 26 | 
 27 | SKLEARN_CONVERTER="${SCRIPTS_DIR}/convert_sklearn.py"
 28 | CUML_CONVERTER="${SCRIPTS_DIR}/convert_cuml.py"
 29 | 
 30 | models=()
 31 | 
 32 | name=xgboost
 33 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
 34 | then
 35 |   ${GENERATOR_SCRIPT} \
 36 |     --name $name \
 37 |     --depth 11 \
 38 |     --trees 2000 \
 39 |     --classes 3 \
 40 |     --features 500 \
 41 |     --storage_type SPARSE
 42 |   models+=( $name )
 43 | fi
 44 | 
 45 | name=xgboost_json
 46 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
 47 | then
 48 |   ${GENERATOR_SCRIPT} \
 49 |     --name $name \
 50 |     --format xgboost_json \
 51 |     --depth 7 \
 52 |     --trees 500 \
 53 |     --features 500 \
 54 |     --predict_proba
 55 |   models+=( $name )
 56 | fi
 57 | 
 58 | name=xgboost_ubj
 59 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
 60 | then
 61 |   ${GENERATOR_SCRIPT} \
 62 |     --name $name \
 63 |     --format xgboost_ubj \
 64 |     --depth 7 \
 65 |     --trees 500 \
 66 |     --features 500 \
 67 |     --predict_proba
 68 |   models+=( $name )
 69 | fi
 70 | 
 71 | name=xgboost_shap
 72 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
 73 | then
 74 |   ${GENERATOR_SCRIPT} \
 75 |     --name $name \
 76 |     --depth 11 \
 77 |     --trees 2000 \
 78 |     --classes 3 \
 79 |     --features 500 \
 80 |     --storage_type SPARSE \
 81 |     --max_batch_size 4096
 82 |   models+=( $name )
 83 | fi
 84 | 
 85 | name=lightgbm
 86 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
 87 | then
 88 |   ${GENERATOR_SCRIPT} \
 89 |     --name $name \
 90 |     --format lightgbm \
 91 |     --type lightgbm \
 92 |     --depth 3 \
 93 |     --trees 2000 \
 94 |     --cat_features 3 \
 95 |     --predict_proba \
 96 |     --disable_experimental_optimizations
 97 |   models+=( $name )
 98 | fi
 99 | 
100 | name=lightgbm_rf
101 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
102 | then
103 |   ${GENERATOR_SCRIPT} \
104 |     --name $name \
105 |     --format lightgbm \
106 |     --type lightgbm_rf \
107 |     --depth 10 \
108 |     --trees 20 \
109 |     --classes 10 \
110 |     --predict_proba
111 |   models+=( $name )
112 | fi
113 | 
114 | name=regression
115 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
116 | then
117 |   ${GENERATOR_SCRIPT} \
118 |     --name $name \
119 |     --format lightgbm \
120 |     --type lightgbm \
121 |     --depth 25 \
122 |     --trees 10 \
123 |     --features 400 \
124 |     --task regression
125 |   models+=( $name )
126 | fi
127 | 
128 | name=sklearn
129 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
130 | then
131 |   ${GENERATOR_SCRIPT} \
132 |     --name $name \
133 |     --type sklearn \
134 |     --depth 3 \
135 |     --trees 10 \
136 |     --features 500 \
137 |     --predict_proba
138 |   models+=( $name )
139 | fi
140 | $SKLEARN_CONVERTER "${MODEL_REPO}/${name}/1/model.pkl" 2>/dev/null
141 | 
142 | name=cuml
143 | if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
144 | then
145 |   ${GENERATOR_SCRIPT} \
146 |     --name $name \
147 |     --type cuml \
148 |     --depth 3 \
149 |     --trees 10 \
150 |     --max_batch_size 32768 \
151 |     --features 500 \
152 |     --task regression
153 |   models+=( $name )
154 | fi
155 | $CUML_CONVERTER "${MODEL_REPO}/${name}/1/model.pkl" 2>/dev/null
156 | 
157 | mkdir -p "${CPU_MODEL_REPO}"
158 | cp -r "${MODEL_REPO}"/* "${CPU_MODEL_REPO}"/
159 | 
160 | if [ ! -z $OWNER_ID ] && [ ! -z $OWNER_GID ]
161 | then
162 |   chown -R "${OWNER_ID}:${OWNER_GID}" "${MODEL_REPO}"
163 |   chown -R "${OWNER_ID}:${OWNER_GID}" "${CPU_MODEL_REPO}"
164 | fi
165 | 
166 | find "${CPU_MODEL_REPO}" -name 'config.pbtxt' -exec \
167 |   sed -i s/KIND_GPU/KIND_CPU/g {} +
168 | 


--------------------------------------------------------------------------------
/qa/run-clang-format.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2019-2023, NVIDIA CORPORATION.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific languapge governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | # Note: This file was taken directly from
 17 | # https://github.com/rapidsai/cuml/blob/branch-21.06/cpp/scripts/run-clang-format.py
 18 | # with minor modifications.
 19 | 
 20 | from __future__ import print_function
 21 | 
 22 | import argparse
 23 | import os
 24 | import re
 25 | import subprocess
 26 | import sys
 27 | import tempfile
 28 | 
 29 | EXPECTED_VERSION = "11.1.0"
 30 | VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)")
 31 | # NOTE: populate this list with more top-level dirs as we add more of them to
 32 | #       to the cuml repo
 33 | DEFAULT_DIRS = ["src", "src/triton_fil"]
 34 | 
 35 | 
 36 | def parse_args():
 37 |     argparser = argparse.ArgumentParser("Runs clang-format on a project")
 38 |     argparser.add_argument(
 39 |         "-dstdir",
 40 |         type=str,
 41 |         default=None,
 42 |         help="Directory to store the temporary outputs of"
 43 |         " clang-format. If nothing is passed for this, then"
 44 |         " a temporary dir will be created using `mkdtemp`",
 45 |     )
 46 |     argparser.add_argument(
 47 |         "-exe", type=str, default="clang-format", help="Path to clang-format exe"
 48 |     )
 49 |     argparser.add_argument(
 50 |         "-inplace",
 51 |         default=False,
 52 |         action="store_true",
 53 |         help="Replace the source files itself.",
 54 |     )
 55 |     argparser.add_argument(
 56 |         "-regex",
 57 |         type=str,
 58 |         default=r"[.](cu|cuh|h|hpp|cpp)$",
 59 |         help="Regex string to filter in sources",
 60 |     )
 61 |     argparser.add_argument(
 62 |         "-ignore",
 63 |         type=str,
 64 |         default=r"cannylab/bh[.]cu$",
 65 |         help="Regex used to ignore files from matched list",
 66 |     )
 67 |     argparser.add_argument(
 68 |         "-v", dest="verbose", action="store_true", help="Print verbose messages"
 69 |     )
 70 |     argparser.add_argument(
 71 |         "dirs", type=str, nargs="*", help="List of dirs where to find sources"
 72 |     )
 73 |     args = argparser.parse_args()
 74 |     args.regex_compiled = re.compile(args.regex)
 75 |     args.ignore_compiled = re.compile(args.ignore)
 76 |     if args.dstdir is None:
 77 |         args.dstdir = tempfile.mkdtemp()
 78 |     ret = subprocess.check_output("%s --version" % args.exe, shell=True)
 79 |     ret = ret.decode("utf-8")
 80 |     version = VERSION_REGEX.match(ret)
 81 |     if version is None:
 82 |         raise Exception("Failed to figure out clang-format version!")
 83 |     version = version.group(1)
 84 |     if version != EXPECTED_VERSION:
 85 |         raise Exception(
 86 |             f"clang-format exe must be v{EXPECTED_VERSION} found '{version}'"
 87 |         )
 88 |     if len(args.dirs) == 0:
 89 |         args.dirs = DEFAULT_DIRS
 90 |     return args
 91 | 
 92 | 
 93 | def list_all_src_files(file_regex, ignore_regex, srcdirs, dstdir, inplace):
 94 |     allFiles = []
 95 |     for srcdir in srcdirs:
 96 |         for root, dirs, files in os.walk(srcdir):
 97 |             for f in files:
 98 |                 if re.search(file_regex, f):
 99 |                     src = os.path.join(root, f)
100 |                     if re.search(ignore_regex, src):
101 |                         continue
102 |                     if inplace:
103 |                         _dir = root
104 |                     else:
105 |                         _dir = os.path.join(dstdir, root)
106 |                     dst = os.path.join(_dir, f)
107 |                     allFiles.append((src, dst))
108 |     return allFiles
109 | 
110 | 
111 | def run_clang_format(src, dst, exe, verbose):
112 |     dstdir = os.path.dirname(dst)
113 |     if not os.path.exists(dstdir):
114 |         os.makedirs(dstdir)
115 |     # run the clang format command itself
116 |     if src == dst:
117 |         cmd = "%s -i %s" % (exe, src)
118 |     else:
119 |         cmd = "%s %s > %s" % (exe, src, dst)
120 |     try:
121 |         subprocess.check_call(cmd, shell=True)
122 |     except subprocess.CalledProcessError:
123 |         print("Failed to run clang-format! Maybe your env is not proper?")
124 |         raise
125 |     # run the diff to check if there are any formatting issues
126 |     cmd = "diff -q %s %s >/dev/null" % (src, dst)
127 |     try:
128 |         subprocess.check_call(cmd, shell=True)
129 |         if verbose:
130 |             print("%s passed" % os.path.basename(src))
131 |     except subprocess.CalledProcessError:
132 |         print(
133 |             "{} failed! 'diff {} {}' will show formatting violations!".format(
134 |                 os.path.basename(src), src, dst
135 |             )
136 |         )
137 |         return False
138 |     return True
139 | 
140 | 
141 | def main():
142 |     args = parse_args()
143 |     # Attempt to making sure that we run this script from root of repo always
144 |     if not os.path.exists(".git"):
145 |         print("Error!! This needs to always be run from the root of repo")
146 |         sys.exit(-1)
147 |     all_files = list_all_src_files(
148 |         args.regex_compiled, args.ignore_compiled, args.dirs, args.dstdir, args.inplace
149 |     )
150 |     # actual format checker
151 |     status = True
152 |     for src, dst in all_files:
153 |         if not run_clang_format(src, dst, args.exe, args.verbose):
154 |             status = False
155 |     if not status:
156 |         print("clang-format failed! You have 2 options:")
157 |         print(" 1. Look at formatting differences above and fix them manually")
158 |         print(" 2. Or run the below command to bulk-fix all these at once")
159 |         print("Bulk-fix command: ")
160 |         print(
161 |             "  python qa/run-clang-format.py {} -inplace".format(" ".join(sys.argv[1:]))
162 |         )
163 |         sys.exit(-1)
164 |     return
165 | 
166 | 
167 | if __name__ == "__main__":
168 |     main()
169 | 


--------------------------------------------------------------------------------
/qa/run_benchmarks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | MODELS=${MODELS:-'small_model small_model-cpu large_model large_model-cpu'}
 3 | BATCHES=${BATCHES:-'1 16 128 1024'}
 4 | MAX_LATENCY=${MAX_LATENCY:-5}
 5 | 
 6 | repo_root="$(git rev-parse --show-toplevel)" || repo_root="$PWD"
 7 | if [ -z $OUTPUT ]
 8 | then
 9 |   OUTPUT="$repo_root/qa/benchmark_output"
10 | fi
11 | 
12 | if [ -z $SHARED_MEM ]
13 | then
14 |   SHARED_MEM="none"
15 | fi
16 | 
17 | run_benchmark() {
18 |   model="$1"
19 |   batch="$2"
20 |   output_dir="$OUTPUT/$model"
21 |   if [ ! -d "$output_dir" ]
22 |   then
23 |     mkdir -p "$output_dir"
24 |   fi
25 | 
26 |   output_file="$output_dir/$batch.csv"
27 |   perf_analyzer \
28 |     -i GRPC \
29 |     --shared-memory $SHARED_MEM \
30 |     --percentile 99 \
31 |     --binary-search \
32 |     --concurrency-range 1:64:2 \
33 |     -l "$MAX_LATENCY" \
34 |     -m "$model" \
35 |     -b "$batch" \
36 |     -f "$output_file"
37 | }
38 | 
39 | for model in $MODELS
40 | do
41 |   for batch in $BATCHES
42 |   do
43 |     run_benchmark "$model" "$batch"
44 |   done
45 | done
46 | 
47 | python3 $repo_root/qa/collate_benchmarks.py $OUTPUT
48 | 


--------------------------------------------------------------------------------
/qa/run_tests.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright (c) 2021, NVIDIA CORPORATION.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | set -e
 17 | 
 18 | QA_DIR=$(cd $(dirname $0); pwd)
 19 | SERVER_ARGS=""
 20 | UUID="$(cat /proc/sys/kernel/random/uuid)"
 21 | CONTAINER_NAME="fil_backend-ci-$UUID"
 22 | DOCKER_RUN=0
 23 | DOCKER_ARGS="-d -p 8000:8000 -p 8001:8001 -p 8002:8002 --name ${CONTAINER_NAME}"
 24 | TRITON_PID=''
 25 | LOG_DIR="${QA_DIR}/logs"
 26 | SERVER_LOG="${LOG_DIR}/${UUID}-server.log"
 27 | TEST_PROFILE=${TEST_PROFILE:-ci}
 28 | 
 29 | if [ ! -d "${LOG_DIR}" ]
 30 | then
 31 |   mkdir -p "${LOG_DIR}"
 32 | fi
 33 | 
 34 | if [ -z $MODEL_REPO ]
 35 | then
 36 |   MODEL_REPO="${QA_DIR}/L0_e2e/model_repository"
 37 | fi
 38 | MODEL_REPO="$(readlink -f $MODEL_REPO)"
 39 | 
 40 | DOCKER_ARGS="${DOCKER_ARGS} -v ${MODEL_REPO}:/models"
 41 | 
 42 | if [ -z $CPU_ONLY ] || [ $CPU_ONLY -eq 0 ]
 43 | then
 44 |   if [ -z $CUDA_VISIBLE_DEVICES ]
 45 |   then
 46 |     DOCKER_ARGS="${DOCKER_ARGS} --gpus all"
 47 |     TRITON_VISIBLE_DEVICES='all'
 48 |   else
 49 |     DOCKER_ARGS="${DOCKER_ARGS} --gpus ${CUDA_VISIBLE_DEVICES}"
 50 |     TRITON_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES}"
 51 |   fi
 52 | else
 53 |   TRITON_VISIBLE_DEVICES=''
 54 | fi
 55 | 
 56 | # If a Triton Docker image has been provided or no tritonserver executable is
 57 | # available, run the server via Docker
 58 | if [ ! -z $TRITON_IMAGE ] || ! command -v tritonserver
 59 | then
 60 |   DOCKER_RUN=1
 61 |   TRITON_IMAGE=${TRITON_IMAGE:-rapids_triton_identity}
 62 |   SERVER_ARGS="${SERVER_ARGS} --model-repository=/models"
 63 | else
 64 |   SERVER_ARGS="${SERVER_ARGS} --model-repository=${MODEL_REPO}"
 65 | fi
 66 | 
 67 | start_server() {
 68 |   if [ $DOCKER_RUN -eq 1 ]
 69 |   then
 70 |     docker run $DOCKER_ARGS $TRITON_IMAGE > /dev/null
 71 |   else
 72 |     if [ -z $TRITON_VISIBLE_DEVICES ]
 73 |     then
 74 |       CUDA_VISIBLE_DEVICES='' tritonserver $SERVER_ARGS > $SERVER_LOG 2>&1 &
 75 |     else
 76 |       tritonserver $SERVER_ARGS > $SERVER_LOG 2>&1 &
 77 |     fi
 78 |     TRITON_PID="$!"
 79 |   fi
 80 | }
 81 | 
 82 | [ ${START_SERVER:-1} -eq 1 ] && start_server || true
 83 | 
 84 | # TODO (wphicks): Run linters
 85 | 
 86 | finally() {
 87 |   if [ ${START_SERVER:-1} -eq 1 ]
 88 |   then
 89 |     if [ -z $TRITON_PID ]
 90 |     then
 91 |       docker logs $CONTAINER_NAME > $SERVER_LOG 2>&1
 92 |       docker rm -f $CONTAINER_NAME > /dev/null 2>&1
 93 |     else
 94 |       kill -15 $TRITON_PID
 95 |       wait
 96 |     fi
 97 |   fi
 98 | }
 99 | 
100 | trap finally EXIT
101 | 
102 | if [ ! -z $CPU_ONLY ] && [ $CPU_ONLY -eq 1 ]
103 | then
104 |   pytest \
105 |     --repo "${MODEL_REPO}" \
106 |     --hypothesis-profile "$TEST_PROFILE" \
107 |     "$QA_DIR"
108 | else
109 |   pytest --repo "${MODEL_REPO}" "$QA_DIR" --hypothesis-profile "$TEST_PROFILE"
110 | fi
111 | 


--------------------------------------------------------------------------------
/scripts/convert_cuml.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """cuML RF to Treelite checkpoint converter
17 | 
18 | Given a path to a pickle file containing a cuML random forest model, this
19 | script will generate a Treelite checkpoint file representation of the model in
20 | the same directory.
21 | """
22 | 
23 | import argparse
24 | import pathlib
25 | import pickle
26 | 
27 | if __name__ == "__main__":
28 |     parser = argparse.ArgumentParser()
29 |     parser.add_argument("pickle_file", help="Path to the pickle file to convert")
30 |     args = parser.parse_args()
31 | 
32 |     with open(args.pickle_file, "rb") as f:
33 |         model = pickle.load(f)
34 | 
35 |     model_dir = pathlib.Path(args.pickle_file).resolve().parent
36 |     out_path = model_dir / "checkpoint.tl"
37 | 
38 |     model.convert_to_treelite_model().to_treelite_checkpoint(str(out_path))
39 | 


--------------------------------------------------------------------------------
/scripts/convert_sklearn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2021, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | """sklearn RF/GBDT to Treelite checkpoint converter
18 | 
19 | Given a path to a pickle file containing a scikit-learn random forest (or
20 | gradient boosting) model, this script will generate a Treelite checkpoint file
21 | representation of the model in the same directory.
22 | """
23 | 
24 | import argparse
25 | import pathlib
26 | import pickle
27 | 
28 | import treelite
29 | 
30 | if __name__ == "__main__":
31 |     parser = argparse.ArgumentParser()
32 |     parser.add_argument("pickle_file", help="Path to the pickle file to convert")
33 |     args = parser.parse_args()
34 | 
35 |     with open(args.pickle_file, "rb") as f:
36 |         model = pickle.load(f)
37 | 
38 |     model_dir = pathlib.Path(args.pickle_file).resolve().parent
39 |     out_path = model_dir / "checkpoint.tl"
40 | 
41 |     tl_model = treelite.sklearn.import_model(model)
42 |     tl_model.serialize(out_path)
43 | 


--------------------------------------------------------------------------------
/scripts/environment.yml:
--------------------------------------------------------------------------------
 1 | name: triton_scripts
 2 | channels:
 3 |   - conda-forge
 4 |   - nvidia
 5 |   - rapidsai
 6 | dependencies:
 7 |   - cuda-version=12.8
 8 |   - cuml=25.04
 9 |   - python
10 |   - scikit-learn>=1.5
11 |   - treelite>=4.4
12 | 


--------------------------------------------------------------------------------
/src/api.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <model.h>
18 | #include <names.h>
19 | #include <shared_state.h>
20 | #include <stdint.h>
21 | #include <triton/backend/backend_common.h>
22 | #include <triton/backend/backend_model.h>
23 | #include <triton/backend/backend_model_instance.h>
24 | 
25 | #include <rapids_triton/triton/api/execute.hpp>
26 | #include <rapids_triton/triton/api/initialize.hpp>
27 | #include <rapids_triton/triton/api/instance_finalize.hpp>
28 | #include <rapids_triton/triton/api/instance_initialize.hpp>
29 | #include <rapids_triton/triton/api/model_finalize.hpp>
30 | #include <rapids_triton/triton/api/model_initialize.hpp>
31 | #include <rapids_triton/triton/model_instance_state.hpp>
32 | #include <rapids_triton/triton/model_state.hpp>
33 | 
34 | namespace triton { namespace backend { namespace NAMESPACE {
35 | 
36 | using ModelState = rapids::TritonModelState<RapidsSharedState>;
37 | using ModelInstanceState =
38 |     rapids::ModelInstanceState<RapidsModel, RapidsSharedState>;
39 | 
40 | extern "C" {
41 | 
42 | /** Confirm that backend is compatible with Triton's backend API version
43 |  */
44 | TRITONSERVER_Error*
45 | TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
46 | {
47 |   return rapids::triton_api::initialize(backend);
48 | }
49 | 
50 | TRITONSERVER_Error*
51 | TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
52 | {
53 |   return rapids::triton_api::model_initialize<ModelState>(model);
54 | }
55 | 
56 | TRITONSERVER_Error*
57 | TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
58 | {
59 |   return rapids::triton_api::model_finalize<ModelState>(model);
60 | }
61 | 
62 | TRITONSERVER_Error*
63 | TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
64 | {
65 |   return rapids::triton_api::instance_initialize<
66 |       ModelState, ModelInstanceState>(instance);
67 | }
68 | 
69 | TRITONSERVER_Error*
70 | TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
71 | {
72 |   return rapids::triton_api::instance_finalize<ModelInstanceState>(instance);
73 | }
74 | 
75 | TRITONSERVER_Error*
76 | TRITONBACKEND_ModelInstanceExecute(
77 |     TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** raw_requests,
78 |     uint32_t const request_count)
79 | {
80 |   return rapids::triton_api::execute<ModelState, ModelInstanceState>(
81 |       instance, raw_requests, static_cast<std::size_t>(request_count));
82 | }
83 | 
84 | }  // extern "C"
85 | 
86 | }}}  // namespace triton::backend::NAMESPACE
87 | 


--------------------------------------------------------------------------------
/src/cpu_forest_model.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <forest_model.h>
20 | #include <names.h>
21 | #include <tl_model.h>
22 | 
23 | #include <cstddef>
24 | #include <memory>
25 | #include <rapids_triton/memory/buffer.hpp>
26 | #include <rapids_triton/memory/types.hpp>
27 | 
28 | namespace triton { namespace backend { namespace NAMESPACE {
29 | 
30 | template <>
31 | struct ForestModel<rapids::HostMemory> {
32 |   ForestModel() = default;
33 |   ForestModel(std::shared_ptr<TreeliteModel> tl_model) : tl_model_{tl_model} {}
34 | 
35 |   void predict(
36 |       rapids::Buffer<float>& output, rapids::Buffer<float const> const& input,
37 |       std::size_t samples, bool predict_proba) const
38 |   {
39 |     tl_model_->predict(output, input, samples, predict_proba);
40 |   }
41 | 
42 | 
43 |  private:
44 |   std::shared_ptr<TreeliteModel> tl_model_;
45 | };
46 | 
47 | }}}  // namespace triton::backend::NAMESPACE
48 | 


--------------------------------------------------------------------------------
/src/fil_config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | #include <cuml/fil/fil.h>
19 | #include <names.h>
20 | #include <tl_config.h>
21 | 
22 | #include <rapids_triton/exceptions.hpp>
23 | #include <sstream>
24 | 
25 | namespace triton { namespace backend { namespace NAMESPACE {
26 | 
27 | namespace detail {
28 | 
29 | inline auto
30 | name_to_tl_algo(std::string const& name)
31 | {
32 |   auto result = ML::fil::algo_t{};
33 |   if (name == "ALGO_AUTO") {
34 |     result = ML::fil::algo_t::ALGO_AUTO;
35 |   } else if (name == "NAIVE") {
36 |     result = ML::fil::algo_t::NAIVE;
37 |   } else if (name == "TREE_REORG") {
38 |     result = ML::fil::algo_t::TREE_REORG;
39 |   } else if (name == "BATCH_TREE_REORG") {
40 |     result = ML::fil::algo_t::BATCH_TREE_REORG;
41 |   } else {
42 |     auto log_stream = std::stringstream{};
43 |     log_stream << "Unknown FIL algorithm name: " << name;
44 |     throw rapids::TritonException(rapids::Error::InvalidArg, log_stream.str());
45 |   }
46 | 
47 |   return result;
48 | }
49 | 
50 | inline auto
51 | name_to_storage_type(std::string const& name)
52 | {
53 |   auto result = ML::fil::storage_type_t{};
54 |   if (name == "AUTO") {
55 |     result = ML::fil::storage_type_t::AUTO;
56 |   } else if (name == "DENSE") {
57 |     result = ML::fil::storage_type_t::DENSE;
58 |   } else if (name == "SPARSE") {
59 |     result = ML::fil::storage_type_t::SPARSE;
60 |   } else if (name == "SPARSE8") {
61 |     result = ML::fil::storage_type_t::SPARSE8;
62 |   } else {
63 |     auto log_stream = std::stringstream{};
64 |     log_stream << "Unknown FIL storage type name: " << name;
65 |     throw rapids::TritonException(rapids::Error::InvalidArg, log_stream.str());
66 |   }
67 | 
68 |   return result;
69 | }
70 | 
71 | }  // namespace detail
72 | 
73 | inline auto
74 | tl_to_fil_config(treelite_config const& tl_config)
75 | {
76 |   return ML::fil::treelite_params_t{
77 |       detail::name_to_tl_algo(tl_config.algo),
78 |       tl_config.output_class,
79 |       tl_config.threshold,
80 |       detail::name_to_storage_type(tl_config.storage_type),
81 |       tl_config.blocks_per_sm,
82 |       tl_config.threads_per_tree,
83 |       0,
84 |       nullptr,
85 |       ML::fil::precision_t::PRECISION_FLOAT32};
86 | }
87 | 
88 | }}}  // namespace triton::backend::NAMESPACE
89 | 


--------------------------------------------------------------------------------
/src/forest_model.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #ifdef TRITON_ENABLE_GPU
20 | #include <cuda_runtime_api.h>
21 | #endif
22 | 
23 | #include <names.h>
24 | #include <tl_model.h>
25 | 
26 | #include <cstddef>
27 | #include <memory>
28 | #include <rapids_triton/exceptions.hpp>
29 | #include <rapids_triton/memory/buffer.hpp>
30 | #include <rapids_triton/memory/types.hpp>
31 | 
32 | namespace triton { namespace backend { namespace NAMESPACE {
33 | 
34 | /* This struct defines a unified prediction interface to both FIL and GTIL.
35 |  * Template specializations are provided based on the type of memory the model
36 |  * is expected to process */
37 | template <rapids::MemoryType M>
38 | struct ForestModel {
39 |   using device_id_t = int;
40 | 
41 |   ForestModel(std::shared_ptr<TreeliteModel> tl_model)
42 |   {
43 |     throw rapids::TritonException(
44 |         rapids::Error::Unsupported,
45 |         "ForestModel invoked with a memory type unsupported by this build");
46 |   }
47 | 
48 |   ForestModel(
49 |       device_id_t device_id, cudaStream_t stream,
50 |       std::shared_ptr<TreeliteModel> tl_model)
51 |   {
52 |     throw rapids::TritonException(
53 |         rapids::Error::Unsupported,
54 |         "ForestModel invoked with a memory type unsupported by this build");
55 |   }
56 | 
57 |   void predict(
58 |       rapids::Buffer<float>& output, rapids::Buffer<float const> const& input,
59 |       std::size_t samples, bool predict_proba) const
60 |   {
61 |     throw rapids::TritonException(
62 |         rapids::Error::Unsupported,
63 |         "ForestModel invoked with a memory type unsupported by this build");
64 |   }
65 | };
66 | }}}  // namespace triton::backend::NAMESPACE
67 | 


--------------------------------------------------------------------------------
/src/gpu_forest_model.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda_runtime_api.h>
20 | #include <cuml/fil/fil.h>
21 | #include <fil_config.h>
22 | #include <forest_model.h>
23 | #include <names.h>
24 | #include <tl_model.h>
25 | 
26 | #include <cstddef>
27 | #include <memory>
28 | #include <raft/core/handle.hpp>
29 | #include <rapids_triton/memory/buffer.hpp>
30 | #include <rapids_triton/memory/types.hpp>
31 | 
32 | namespace triton { namespace backend { namespace NAMESPACE {
33 | 
34 | using fil_forest_t = ML::fil::forest_t<float>;
35 | 
36 | template <>
37 | struct ForestModel<rapids::DeviceMemory> {
38 |   using device_id_t = int;
39 |   ForestModel(
40 |       device_id_t device_id, cudaStream_t stream,
41 |       std::shared_ptr<TreeliteModel> tl_model)
42 |       : device_id_{device_id}, raft_handle_{stream}, tl_model_{tl_model},
43 |         fil_forest_{[this]() {
44 |           auto result = fil_forest_t{};
45 |           auto variant_result = ML::fil::forest_variant{};
46 |           auto config = tl_to_fil_config(tl_model_->config());
47 |           ML::fil::from_treelite(
48 |               raft_handle_, &variant_result, tl_model_->handle(), &config);
49 |           try {
50 |             result = std::get<fil_forest_t>(variant_result);
51 |           }
52 |           catch (std::bad_variant_access const& err) {
53 |             throw rapids::TritonException(
54 |                 rapids::Error::Internal,
55 |                 "Model did not load with expected precision");
56 |           }
57 |           return result;
58 |         }()}
59 |   {
60 |   }
61 | 
62 |   ForestModel(ForestModel const& other) = default;
63 |   ForestModel& operator=(ForestModel const& other) = default;
64 |   ForestModel(ForestModel&& other) = default;
65 |   ForestModel& operator=(ForestModel&& other) = default;
66 | 
67 |   ~ForestModel() noexcept { ML::fil::free(raft_handle_, fil_forest_); }
68 | 
69 |   void predict(
70 |       rapids::Buffer<float>& output, rapids::Buffer<float const> const& input,
71 |       std::size_t samples, bool predict_proba) const
72 |   {
73 |     ML::fil::predict(
74 |         raft_handle_, fil_forest_, output.data(), input.data(), samples,
75 |         predict_proba);
76 |   }
77 | 
78 |  private:
79 |   raft::handle_t raft_handle_;
80 |   std::shared_ptr<TreeliteModel> tl_model_;
81 |   fil_forest_t fil_forest_;
82 |   device_id_t device_id_;
83 | };
84 | 
85 | }}}  // namespace triton::backend::NAMESPACE
86 | 


--------------------------------------------------------------------------------
/src/gpu_treeshap_model.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cuda_runtime_api.h>
20 | #include <tl_model.h>
21 | #include <treeshap_model.h>
22 | 
23 | #include <cstddef>
24 | #include <cuml/explainer/tree_shap.hpp>
25 | #include <memory>
26 | #include <raft/core/handle.hpp>
27 | #include <rapids_triton/memory/buffer.hpp>
28 | #include <rapids_triton/memory/types.hpp>
29 | 
30 | namespace triton { namespace backend { namespace NAMESPACE {
31 | 
32 | template <>
33 | struct TreeShapModel<rapids::DeviceMemory> {
34 |   using device_id_t = int;
35 |   TreeShapModel(
36 |       device_id_t device_id, cudaStream_t stream,
37 |       std::shared_ptr<TreeliteModel> tl_model)
38 |       : device_id_{device_id}, raft_handle_{stream}, tl_model_{tl_model},
39 |         path_info_{ML::Explainer::extract_path_info(tl_model_->handle())}
40 |   {
41 |   }
42 | 
43 |   TreeShapModel(TreeShapModel const& other) = default;
44 |   TreeShapModel& operator=(TreeShapModel const& other) = default;
45 |   TreeShapModel(TreeShapModel&& other) = default;
46 |   TreeShapModel& operator=(TreeShapModel&& other) = default;
47 | 
48 |   void predict(
49 |       rapids::Buffer<float>& output, rapids::Buffer<float const> const& input,
50 |       std::size_t n_rows, std::size_t n_cols) const
51 |   {
52 |     // Need to synchronize on the stream because treeshap currently does not
53 |     // take a stream on its API
54 |     input.stream_synchronize();
55 |     ML::Explainer::gpu_treeshap(
56 |         path_info_,
57 |         ML::Explainer::FloatPointer(const_cast<float*>(input.data())), n_rows,
58 |         n_cols, ML::Explainer::FloatPointer(output.data()), output.size());
59 |     output.stream_synchronize();
60 |   }
61 | 
62 |  private:
63 |   raft::handle_t raft_handle_;
64 |   std::shared_ptr<TreeliteModel> tl_model_;
65 |   device_id_t device_id_;
66 |   ML::Explainer::TreePathHandle path_info_;
67 | };
68 | 
69 | }}}  // namespace triton::backend::NAMESPACE
70 | 


--------------------------------------------------------------------------------
/src/herring/node.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | #include <bitset>
 19 | #include <cstdint>
 20 | 
 21 | namespace herring {
 22 | /* Summary of Types
 23 |  * ----------------
 24 |  *  value_t (float or double): The value used for testing a node condition or
 25 |  *    for providing the output of leaves.
 26 |  *  feature_index_t (std::uint16_t or std::uint32_t): Index indicating which
 27 |  *    feature this conditional applies to
 28 |  *  offset_t (std::uint16_t or std::uint32_t): Offset between this node and
 29 |  *    its distant child. For small trees, using a smaller type can reduce the
 30 |  *    padded size of the node to as few as 8 bytes.
 31 |  *  output_index_t (typically std::uint32_t): If leaf output values cannot be
 32 |  * stored in the same memory as test condition values, this index provides a
 33 |  *    lookup location for output values stored in the tree.
 34 |  */
 35 | template <
 36 |     typename value_t, typename feature_index_t, typename offset_t,
 37 |     typename output_index_t>
 38 | struct simple_node {
 39 |   using value_type = value_t;  // float or double
 40 |   using index_type = feature_index_t;
 41 |   using offset_type = offset_t;
 42 |   using output_index_type = output_index_t;
 43 |   using category_set_type =
 44 |       std::bitset<std::max(sizeof(value_type), sizeof(output_index_type))>;
 45 |   // Cannot use std::variant here because it takes up 4 additional bytes when
 46 |   // value_type is float
 47 |   union value_or_index {
 48 |     value_type value;
 49 |     output_index_type index;
 50 |     category_set_type categories;
 51 |   };
 52 |   value_or_index value;  // 4 bytes for float
 53 |   offset_type
 54 |       distant_offset;  // 2 bytes for depth < 16 or small trees; 4 otherwise
 55 |   index_type feature;  // 1-4 bytes, depending on number of features
 56 | 
 57 |   simple_node() : value{value_type{}}, distant_offset{}, feature{} {}
 58 | };
 59 | 
 60 | template <
 61 |     bool categorical, bool inclusive_threshold, typename value_t,
 62 |     typename feature_index_t, typename offset_t, typename output_index_t>
 63 | auto
 64 | evaluate_node(
 65 |     simple_node<value_t, feature_index_t, offset_t, output_index_t> const& node,
 66 |     float feature_value)
 67 | {
 68 |   auto condition = false;
 69 |   if constexpr (categorical) {
 70 |     if (feature_value >= 0 && feature_value < node.value.categories.size()) {
 71 |       // NOTE: This cast aligns with the convention used in LightGBM and
 72 |       // other frameworks to cast floats when converting to integral
 73 |       // categories. This can have surprising effects with floating point
 74 |       // arithmetic, but it is kept this way for now in order to provide
 75 |       // consistency with results obtained from the training frameworks.
 76 |       condition =
 77 |           node.value.categories[static_cast<std::size_t>(feature_value)];
 78 |     }
 79 |   } else {
 80 |     if constexpr (inclusive_threshold) {
 81 |       condition = (feature_value <= node.value.value);
 82 |     } else {
 83 |       condition = (feature_value < node.value.value);
 84 |     }
 85 |   }
 86 | 
 87 |   // This narrowing conversion is guaranteed safe because distant_offset
 88 |   // cannot be 0
 89 |   // TODO(wphicks): Guarantee this with custom types
 90 |   // (https://github.com/triton-inference-server/fil_backend/issues/204)
 91 | #pragma GCC diagnostic push
 92 | #pragma GCC diagnostic ignored "-Wnarrowing"
 93 |   return offset_t{1 + condition * (node.distant_offset - 1)};
 94 | #pragma GCC diagnostic pop
 95 | }
 96 | 
 97 | template <
 98 |     bool categorical, bool inclusive_threshold, typename value_t,
 99 |     typename feature_index_t, typename offset_t, typename output_index_t>
100 | auto
101 | evaluate_node(
102 |     simple_node<value_t, feature_index_t, offset_t, output_index_t> const& node,
103 |     float const* row)
104 | {
105 |   auto feature_value = *(row + node.feature);
106 |   return evaluate_node<categorical, inclusive_threshold>(node, feature_value);
107 | }
108 | }  // namespace herring
109 | 


--------------------------------------------------------------------------------
/src/herring/omp_helpers.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | #include <omp.h>
19 | 
20 | template <typename T>
21 | struct thread_count {
22 |   thread_count() : value{omp_get_max_threads()} {}
23 |   thread_count(T t)
24 |       : value{[](T t) {
25 |           auto result = T{t};
26 |           auto max_count = omp_get_max_threads();
27 |           if (t < 1 || t > max_count) {
28 |             result = max_count;
29 |           }
30 |           return result;
31 |         }(t)}
32 |   {
33 |   }
34 |   operator int() const { return static_cast<int>(value); }
35 | 
36 |  private:
37 |   T value;
38 | };
39 | 


--------------------------------------------------------------------------------
/src/herring/output_ops.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | namespace herring {
20 | 
21 | /* Enum representing possible element-wise operations on output */
22 | enum class element_op {
23 |   disable,
24 |   signed_square,
25 |   hinge,
26 |   sigmoid,
27 |   exponential,
28 |   exponential_standard_ratio,
29 |   logarithm_one_plus_exp
30 | };
31 | 
32 | /* Enum representing possible row-wise operations on output */
33 | enum class row_op { disable, softmax, max_index };
34 | }  // namespace herring
35 | 


--------------------------------------------------------------------------------
/src/herring/tree.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cmath>
 20 | #include <herring/node.hpp>
 21 | #include <type_traits>
 22 | #include <vector>
 23 | 
 24 | #include "herring/type_helpers.hpp"
 25 | 
 26 | namespace herring {
 27 | /* A tree that can just return the stored value of nodes as its output */
 28 | template <
 29 |     typename value_t, typename feature_index_t, typename offset_t,
 30 |     typename output_index_t, typename output_t>
 31 | struct simple_tree {
 32 |   using node_type =
 33 |       simple_node<value_t, feature_index_t, offset_t, output_index_t>;
 34 |   using output_type = output_t;
 35 |   std::vector<node_type> nodes;
 36 |   std::vector<bool> default_distant;
 37 |   std::vector<bool> categorical_node;
 38 |   bool has_categorical_nodes;
 39 | 
 40 |   auto get_leaf_value(node_type const& node) const
 41 |   {
 42 |     if constexpr (std::is_same_v<value_t, output_t>) {
 43 |       return node.value.value;
 44 |     } else {
 45 |       static_assert(std::is_same_v<output_index_t, output_t>);
 46 |       return node.value.index;
 47 |     }
 48 |   }
 49 | 
 50 |   auto get_leaf_value(std::size_t node_index) const
 51 |   {
 52 |     return get_leaf_value(nodes[node_index]);
 53 |   }
 54 | 
 55 |   template <
 56 |       bool missing_values_in_row, bool categorical_model,
 57 |       bool inclusive_threshold>
 58 |   auto evaluate_tree_node(std::size_t node_index, float const* row) const
 59 |   {
 60 |     auto result = offset_t{};
 61 |     if constexpr (categorical_model) {
 62 |       if (!has_categorical_nodes) {
 63 |         result = evaluate_tree_node_<
 64 |             missing_values_in_row, false, inclusive_threshold>(node_index, row);
 65 |       } else {
 66 |         result = evaluate_tree_node_<
 67 |             missing_values_in_row, true, inclusive_threshold>(node_index, row);
 68 |       }
 69 |     } else {
 70 |       result = evaluate_tree_node_<
 71 |           missing_values_in_row, false, inclusive_threshold>(node_index, row);
 72 |     }
 73 |     return result;
 74 |   };
 75 | 
 76 |  private:
 77 |   template <
 78 |       bool missing_values_in_row, bool categorical_tree,
 79 |       bool inclusive_threshold>
 80 |   auto evaluate_tree_node_(std::size_t node_index, float const* row) const
 81 |   {
 82 |     auto const& node = nodes[node_index];
 83 |     auto result = offset_t{};
 84 |     if constexpr (missing_values_in_row) {
 85 |       auto feature_value = *(row + node.feature);
 86 |       auto present = !std::isnan(feature_value);
 87 |       if (present) {
 88 |         if constexpr (categorical_tree) {
 89 |           if (!categorical_node[node_index]) {
 90 |             result =
 91 |                 evaluate_node<false, inclusive_threshold>(node, feature_value);
 92 |           } else {
 93 |             result =
 94 |                 evaluate_node<true, inclusive_threshold>(node, feature_value);
 95 |           }
 96 |         } else {
 97 |           result =
 98 |               evaluate_node<false, inclusive_threshold>(node, feature_value);
 99 |         }
100 |       } else {
101 |         // This narrowing conversion is guaranteed safe because distant_offset
102 |         // cannot be 0
103 |         // TODO(wphicks): Guarantee this with custom types
104 |         // (https://github.com/triton-inference-server/fil_backend/issues/204)
105 | #pragma GCC diagnostic push
106 | #pragma GCC diagnostic ignored "-Wnarrowing"
107 |         result = 1 + (node.distant_offset - 1) * default_distant[node_index];
108 | #pragma GCC diagnostic pop
109 |       }
110 |     } else {
111 |       if constexpr (categorical_tree) {
112 |         if (!categorical_node[node_index]) {
113 |           result = evaluate_node<false, inclusive_threshold>(node, row);
114 |         } else {
115 |           result = evaluate_node<true, inclusive_threshold>(node, row);
116 |         }
117 |       } else {
118 |         result = evaluate_node<false, inclusive_threshold>(node, row);
119 |       }
120 |     }
121 |     return result;
122 |   }
123 | };
124 | 
125 | 
126 | /* A tree that must look up its output values in separate storage */
127 | template <
128 |     typename value_t, typename feature_index_t, typename offset_t,
129 |     typename output_index_t, typename output_t>
130 | struct lookup_tree {
131 |   using node_type =
132 |       simple_node<value_t, feature_index_t, offset_t, output_index_t>;
133 |   using output_type = output_t;
134 |   std::vector<node_type> nodes;
135 |   std::vector<output_type> leaf_outputs;
136 |   std::vector<offset_t> default_distant;
137 |   std::vector<bool> categorical_node;
138 |   bool has_categorical_nodes;
139 | 
140 |   template <
141 |       typename tree_output_type = output_t,
142 |       std::enable_if_t<
143 |           is_container_specialization<tree_output_type, std::vector>::value,
144 |           bool> = true>
145 |   auto const& get_leaf_value(node_type const& node) const
146 |   {
147 |     return leaf_outputs[node.value.index];
148 |   }
149 | 
150 |   template <
151 |       typename tree_output_type = output_t,
152 |       std::enable_if_t<
153 |           !is_container_specialization<tree_output_type, std::vector>::value,
154 |           bool> = true>
155 |   auto get_leaf_value(node_type const& node) const
156 |   {
157 |     return leaf_outputs[node.value.index];
158 |   }
159 | 
160 |   auto get_leaf_value(std::size_t node_id) const
161 |   {
162 |     return leaf_outputs[nodes[node_id].value.index];
163 |   }
164 | 
165 |   template <
166 |       bool missing_values_in_row, bool categorical_model,
167 |       bool inclusive_threshold>
168 |   auto evaluate_tree_node(std::size_t node_index, float const* row) const
169 |   {
170 |     auto result = offset_t{};
171 |     if constexpr (categorical_model) {
172 |       if (!has_categorical_nodes) {
173 |         result = evaluate_tree_node_<
174 |             missing_values_in_row, false, inclusive_threshold>(node_index, row);
175 |       } else {
176 |         result = evaluate_tree_node_<
177 |             missing_values_in_row, true, inclusive_threshold>(node_index, row);
178 |       }
179 |     } else {
180 |       result = evaluate_tree_node_<
181 |           missing_values_in_row, false, inclusive_threshold>(node_index, row);
182 |     }
183 |     return result;
184 |   };
185 | 
186 |  private:
187 |   template <
188 |       bool missing_values_in_row, bool categorical_tree,
189 |       bool inclusive_threshold>
190 |   auto evaluate_tree_node_(std::size_t node_index, float const* row) const
191 |   {
192 |     auto const& node = nodes[node_index];
193 |     auto result = offset_t{};
194 |     if constexpr (missing_values_in_row) {
195 |       auto feature_value = *(row + node.feature);
196 |       auto present = !std::isnan(feature_value);
197 |       if (present) {
198 |         if constexpr (categorical_tree) {
199 |           if (!categorical_node[node_index]) {
200 |             result =
201 |                 evaluate_node<false, inclusive_threshold>(node, feature_value);
202 |           } else {
203 |             result =
204 |                 evaluate_node<true, inclusive_threshold>(node, feature_value);
205 |           }
206 |         } else {
207 |           result =
208 |               evaluate_node<false, inclusive_threshold>(node, feature_value);
209 |         }
210 |       } else {
211 |         // This narrowing conversion is guaranteed safe because distant_offset
212 |         // cannot be 0
213 |         // TODO(wphicks): Guarantee this with custom types
214 |         // (https://github.com/triton-inference-server/fil_backend/issues/204)
215 | #pragma GCC diagnostic push
216 | #pragma GCC diagnostic ignored "-Wnarrowing"
217 |         result = 1 + (node.distant_offset - 1) * default_distant[node_index];
218 | #pragma GCC diagnostic pop
219 |       }
220 |     } else {
221 |       if constexpr (categorical_tree) {
222 |         if (!categorical_node[node_index]) {
223 |           result = evaluate_node<false, inclusive_threshold>(node, row);
224 |         } else {
225 |           result = evaluate_node<true, inclusive_threshold>(node, row);
226 |         }
227 |       } else {
228 |         result = evaluate_node<false, inclusive_threshold>(node, row);
229 |       }
230 |     }
231 |     return result;
232 |   }
233 | };
234 | }  // namespace herring
235 | 


--------------------------------------------------------------------------------
/src/herring/type_helpers.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <type_traits>
20 | 
21 | namespace herring {
22 | template <typename T, template <typename...> class U>
23 | struct is_container_specialization : std::false_type {
24 |   using value_type = T;
25 | };
26 | 
27 | template <template <typename...> class U, typename... Args>
28 | struct is_container_specialization<U<Args...>, U> : std::true_type {
29 |   using value_type = typename U<Args...>::value_type;
30 | };
31 | }  // namespace herring
32 | 


--------------------------------------------------------------------------------
/src/model.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #ifdef TRITON_ENABLE_GPU
 20 | #include <cuda_runtime_api.h>
 21 | #include <gpu_forest_model.h>
 22 | #include <gpu_treeshap_model.h>
 23 | #else
 24 | #include <forest_model.h>
 25 | 
 26 | #include <rapids_triton/cpu_only/cuda_runtime_replacement.hpp>
 27 | #endif
 28 | 
 29 | #include <cpu_forest_model.h>
 30 | #include <cpu_treeshap_model.h>
 31 | #include <names.h>
 32 | #include <shared_state.h>
 33 | #include <tl_model.h>
 34 | 
 35 | #include <filesystem>
 36 | #include <optional>
 37 | #include <rapids_triton/batch/batch.hpp>        // rapids::Batch
 38 | #include <rapids_triton/build_control.hpp>      // rapids::IS_GPU_BUILD
 39 | #include <rapids_triton/memory/types.hpp>       // rapids::MemoryType
 40 | #include <rapids_triton/model/model.hpp>        // rapids::Model
 41 | #include <rapids_triton/tensor/tensor.hpp>      // rapids::Tensor
 42 | #include <rapids_triton/triton/deployment.hpp>  // rapids::DeploymentType
 43 | #include <rapids_triton/triton/device.hpp>      // rapids::device_id_t
 44 | 
 45 | namespace triton { namespace backend { namespace NAMESPACE {
 46 | 
 47 | struct RapidsModel : rapids::Model<RapidsSharedState> {
 48 |   RapidsModel(
 49 |       std::shared_ptr<RapidsSharedState> shared_state,
 50 |       rapids::device_id_t device_id, cudaStream_t default_stream,
 51 |       rapids::DeploymentType deployment_type, std::string const& filepath)
 52 |       : rapids::Model<RapidsSharedState>(
 53 |             shared_state, device_id, default_stream, deployment_type, filepath)
 54 |   {
 55 |   }
 56 | 
 57 |   void predict(rapids::Batch& batch) const
 58 |   {
 59 |     /* Get I/O Tensors */
 60 |     auto input = get_input<float>(batch, "input__0");
 61 |     auto output = get_output<float>(batch, "output__0");
 62 |     auto samples = input.shape()[0];
 63 | 
 64 |     /* Cache pointer to shared state */
 65 |     auto shared_state = get_shared_state();
 66 | 
 67 |     /* Create non-owning buffer pointing to same memory provided by I/O
 68 |      * tensors. If we determine that it is possible and worthwhile to transfer
 69 |      * inputs to device for processing, new buffers will be allocated on-device
 70 |      * in place of these. */
 71 |     auto input_buffer = rapids::Buffer<float const>(
 72 |         input.data(), input.size(), input.mem_type(), input.device(),
 73 |         input.stream());
 74 |     auto output_buffer = rapids::Buffer<float>(
 75 |         output.data(), output.size(), output.mem_type(), output.device(),
 76 |         output.stream());
 77 | 
 78 |     /* Determine if it is possible and worthwhile to copy data to device
 79 |      * before performing inference */
 80 |     if constexpr (rapids::IS_GPU_BUILD) {  // Do we support GPU ops at all?
 81 |       if (gpu_model.has_value()) {         // Is model loaded on device?
 82 |         // Is incoming data on host but large enough to justify copying?
 83 |         if (input.mem_type() == rapids::HostMemory &&
 84 |             samples > shared_state->transfer_threshold()) {
 85 |           // Create new buffer on-device to store input data
 86 |           input_buffer = rapids::Buffer<float const>(
 87 |               input_buffer, rapids::DeviceMemory, get_device_id());
 88 |         }
 89 |       }
 90 | 
 91 |       // Are input data and output data in different places?
 92 |       if (input_buffer.mem_type() != output_buffer.mem_type()) {
 93 |         // Create output buffer in correct  location
 94 |         output_buffer = rapids::Buffer<float>(
 95 |             output.size(), input_buffer.mem_type(), get_device_id(),
 96 |             get_stream());
 97 |       }
 98 |     }
 99 | 
100 |     /* Perform inference */
101 |     if (input_buffer.mem_type() == rapids::DeviceMemory) {
102 |       gpu_model.value().predict(
103 |           output_buffer, input_buffer, samples, shared_state->predict_proba());
104 |     } else {
105 |       cpu_model.predict(
106 |           output_buffer, input_buffer, samples, shared_state->predict_proba());
107 |     }
108 | 
109 |     /* If the output buffer we used for prediction is in a different place from
110 |      * our original output tensor, copy back to the output tensor's memory */
111 |     if (output_buffer.mem_type() != output.mem_type()) {
112 |       rapids::copy(output.buffer(), output_buffer);
113 |     }
114 | 
115 |     output.finalize();
116 | 
117 |     // boolean to check whether gpu treeshap will be executed
118 |     auto run_treeshap = shared_state->check_output_name("treeshap_output");
119 |     if (run_treeshap) {
120 |       auto treeshap_output = get_output<float>(batch, "treeshap_output");
121 |       auto treeshap_output_buffer = rapids::Buffer<float>(
122 |           treeshap_output.data(), treeshap_output.size(),
123 |           treeshap_output.mem_type(), treeshap_output.device(),
124 |           treeshap_output.stream());
125 | 
126 |       if (gpu_treeshap_model.has_value() &&
127 |           input_buffer.mem_type() == rapids::DeviceMemory) {
128 |         if constexpr (rapids::IS_GPU_BUILD) {
129 |           // The shape of treeshap output is (, num_classes * (n_cols + 1))
130 |           gpu_treeshap_model->predict(
131 |               treeshap_output_buffer, input_buffer, samples, input.shape()[1]);
132 |         }
133 |       } else if (cpu_treeshap_model.has_value()) {
134 |         cpu_treeshap_model->predict(
135 |             treeshap_output_buffer, input_buffer, samples, input.shape()[1]);
136 |       }
137 | 
138 |       treeshap_output.finalize();
139 |     }
140 |   }
141 | 
142 |  private:
143 |   auto model_file()
144 |   {
145 |     auto path = std::filesystem::path(get_filepath());
146 |     if (std::filesystem::is_directory(path)) {
147 |       switch (get_shared_state()->model_format()) {
148 |         case SerializationFormat::xgboost:
149 |           path /= "xgboost.model";
150 |           break;
151 |         case SerializationFormat::xgboost_json:
152 |           path /= "xgboost.json";
153 |           break;
154 |         case SerializationFormat::xgboost_ubj:
155 |           path /= "xgboost.ubj";
156 |           break;
157 |         case SerializationFormat::lightgbm:
158 |           path /= "model.txt";
159 |           break;
160 |         case SerializationFormat::treelite:
161 |           path /= "checkpoint.tl";
162 |           break;
163 |       }
164 |     }
165 |     return path;
166 |   }
167 | 
168 |  public:
169 |   void load()
170 |   {
171 |     auto shared_state = get_shared_state();
172 | 
173 |     // Cache location (GPU/host) preference for incoming input data
174 |     if constexpr (rapids::IS_GPU_BUILD) {
175 |       if (get_deployment_type() == rapids::GPUDeployment) {
176 |         if (shared_state->transfer_threshold() == 0) {
177 |           // If the transfer threshold is 0, we always want input on device
178 |           preferred_mem_type_ = rapids::DeviceMemory;
179 |         } else {
180 |           // If the transfer threshold is non-zero, we'll take the input
181 |           // however it comes and then transfer it to device if it exceeds the
182 |           // given threshold
183 |           preferred_mem_type_ = std::nullopt;
184 |         }
185 |       } else {
186 |         // If we're deployed on-host, we want data in host memory
187 |         preferred_mem_type_ = rapids::HostMemory;
188 |       }
189 |     } else {
190 |       preferred_mem_type_ = rapids::HostMemory;
191 |     }
192 | 
193 |     // Load model via Treelite
194 |     auto tl_model = std::make_shared<TreeliteModel>(
195 |         model_file(), shared_state->model_format(), shared_state->config(),
196 |         shared_state->predict_proba(), shared_state->use_herring(),
197 |         shared_state->xgboost_allow_unknown_field());
198 | 
199 | 
200 |     if (get_deployment_type() == rapids::GPUDeployment) {
201 |       if constexpr (rapids::IS_GPU_BUILD) {
202 |         gpu_model.emplace(get_device_id(), get_stream(), tl_model);
203 | 
204 |         if (shared_state->check_output_name("treeshap_output")) {
205 |           gpu_treeshap_model.emplace(get_device_id(), get_stream(), tl_model);
206 |         }
207 |       }
208 |     } else {
209 |       if (shared_state->check_output_name("treeshap_output")) {
210 |         cpu_treeshap_model.emplace(tl_model);
211 |       }
212 |     }
213 |     cpu_model = ForestModel<rapids::HostMemory>(tl_model);
214 |   }
215 | 
216 |   std::optional<rapids::MemoryType> preferred_mem_type(
217 |       rapids::Batch& batch) const
218 |   {
219 |     return preferred_mem_type_;
220 |   }
221 | 
222 |  private:
223 |   std::optional<rapids::MemoryType> preferred_mem_type_{};
224 |   std::size_t num_classes_{};
225 |   ForestModel<rapids::HostMemory> cpu_model;
226 |   std::optional<TreeShapModel<rapids::HostMemory>> cpu_treeshap_model;
227 |   std::optional<ForestModel<rapids::DeviceMemory>> gpu_model{};
228 |   std::optional<TreeShapModel<rapids::DeviceMemory>> gpu_treeshap_model{};
229 | };
230 | 
231 | }}}  // namespace triton::backend::NAMESPACE
232 | 


--------------------------------------------------------------------------------
/src/names.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | #define NAMESPACE fil
19 | 
20 | #ifndef TRITON_ENABLE_GPU
21 | #include <rapids_triton/cpu_only/cuda_runtime_replacement.hpp>
22 | 
23 | namespace triton { namespace backend { namespace NAMESPACE {
24 | using cudaStream_t = rapids::cudaStream_t;
25 | }}}  // namespace triton::backend::NAMESPACE
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/serialization.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | #include <names.h>
19 | 
20 | #include <rapids_triton/exceptions.hpp>
21 | #include <sstream>
22 | #include <string>
23 | 
24 | namespace triton { namespace backend { namespace NAMESPACE {
25 | 
26 | enum struct SerializationFormat {
27 |   xgboost,
28 |   xgboost_json,
29 |   xgboost_ubj,
30 |   lightgbm,
31 |   treelite
32 | };
33 | 
34 | inline auto
35 | string_to_serialization(std::string const& type_string)
36 | {
37 |   auto result = SerializationFormat{};
38 | 
39 |   if (type_string == "xgboost") {
40 |     result = SerializationFormat::xgboost;
41 |   } else if (type_string == "xgboost_json") {
42 |     result = SerializationFormat::xgboost_json;
43 |   } else if (type_string == "xgboost_ubj") {
44 |     result = SerializationFormat::xgboost_ubj;
45 |   } else if (type_string == "lightgbm") {
46 |     result = SerializationFormat::lightgbm;
47 |   } else if (type_string == "treelite_checkpoint") {
48 |     result = SerializationFormat::treelite;
49 |   } else {
50 |     auto log_stream = std::stringstream{};
51 |     log_stream << type_string
52 |                << " not recognized as a valid serialization format.";
53 |     throw rapids::TritonException(rapids::Error::Unsupported, log_stream.str());
54 |   }
55 | 
56 |   return result;
57 | }
58 | 
59 | inline auto
60 | serialization_to_string(SerializationFormat format)
61 | {
62 |   auto result = std::string{};
63 | 
64 |   switch (format) {
65 |     case SerializationFormat::xgboost:
66 |       result = "xgboost";
67 |       break;
68 |     case SerializationFormat::xgboost_json:
69 |       result = "xgboost_json";
70 |       break;
71 |     case SerializationFormat::xgboost_ubj:
72 |       result = "xgboost_ubj";
73 |       break;
74 |     case SerializationFormat::lightgbm:
75 |       result = "lightgbm";
76 |       break;
77 |     case SerializationFormat::treelite:
78 |       result = "treelite_checkpoint";
79 |       break;
80 |   }
81 | 
82 |   return result;
83 | }
84 | 
85 | }}}  // namespace triton::backend::NAMESPACE
86 | 


--------------------------------------------------------------------------------
/src/shared_state.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <names.h>
20 | #include <serialization.h>
21 | #include <tl_config.h>
22 | #include <tl_utils.h>
23 | 
24 | #include <algorithm>
25 | #include <cstddef>
26 | #include <memory>
27 | #include <rapids_triton/model/shared_state.hpp>
28 | 
29 | namespace triton { namespace backend { namespace NAMESPACE {
30 | 
31 | auto constexpr DEFAULT_TRANSFER_THRESHOLD = std::size_t{};
32 | 
33 | struct RapidsSharedState : rapids::SharedModelState {
34 |   RapidsSharedState(std::unique_ptr<common::TritonJson::Value>&& config)
35 |       : rapids::SharedModelState{std::move(config), true}
36 |   {
37 |   }
38 | 
39 |   void load()
40 |   {
41 |     predict_proba_ = get_config_param<bool>("predict_proba", false);
42 |     model_format_ = string_to_serialization(
43 |         get_config_param<std::string>("model_type", std::string{"xgboost"}));
44 |     xgboost_allow_unknown_field_ =
45 |         get_config_param<bool>("xgboost_allow_unknown_field", false);
46 |     transfer_threshold_ = get_config_param<std::size_t>(
47 |         "transfer_threshold", DEFAULT_TRANSFER_THRESHOLD);
48 | 
49 |     tl_config_->algo =
50 |         get_config_param<std::string>("algo", std::string("ALGO_AUTO"));
51 |     tl_config_->storage_type =
52 |         get_config_param<std::string>("storage_type", std::string("AUTO"));
53 |     tl_config_->output_class = get_config_param<bool>("output_class");
54 |     if (tl_config_->output_class) {
55 |       tl_config_->threshold = get_config_param<float>("threshold");
56 |     } else {
57 |       tl_config_->threshold = 0.5f;
58 |     }
59 |     tl_config_->blocks_per_sm = get_config_param<int>("blocks_per_sm", 0);
60 |     tl_config_->threads_per_tree =
61 |         std::max(1, get_config_param<int>("threads_per_tree", 1));
62 |     tl_config_->cpu_nthread = get_config_param<int>("cpu_nthread", -1);
63 |     use_herring_ =
64 |         get_config_param<bool>("use_experimental_optimizations", false);
65 |   }
66 | 
67 |   auto predict_proba() const { return predict_proba_; }
68 |   auto model_format() const { return model_format_; }
69 |   auto xgboost_allow_unknown_field() const
70 |   {
71 |     return xgboost_allow_unknown_field_;
72 |   }
73 |   auto transfer_threshold() const { return transfer_threshold_; }
74 |   auto config() const { return tl_config_; }
75 |   auto use_herring() const { return use_herring_; }
76 | 
77 |  private:
78 |   bool predict_proba_{};
79 |   SerializationFormat model_format_{};
80 |   bool xgboost_allow_unknown_field_{};
81 |   std::size_t transfer_threshold_{};
82 |   std::shared_ptr<treelite_config> tl_config_ =
83 |       std::make_shared<treelite_config>();
84 |   bool use_herring_{};
85 | };
86 | 
87 | }}}  // namespace triton::backend::NAMESPACE
88 | 


--------------------------------------------------------------------------------
/src/tl_config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <names.h>
20 | 
21 | #include <string>
22 | 
23 | namespace triton { namespace backend { namespace NAMESPACE {
24 | 
25 | struct treelite_config {
26 |   std::string algo;
27 |   bool output_class;
28 |   float threshold;
29 |   std::string storage_type;
30 |   int blocks_per_sm;
31 |   int threads_per_tree;
32 |   int cpu_nthread;
33 | };
34 | 
35 | }}}  // namespace triton::backend::NAMESPACE
36 | 


--------------------------------------------------------------------------------
/src/tl_model.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | #include <names.h>
 19 | #include <serialization.h>
 20 | #include <tl_config.h>
 21 | #include <tl_utils.h>
 22 | #include <treelite/gtil.h>
 23 | #include <treelite/model_loader.h>
 24 | #include <treelite/tree.h>
 25 | 
 26 | #include <filesystem>
 27 | #include <herring/tl_helpers.hpp>
 28 | #include <memory>
 29 | #include <optional>
 30 | #include <rapids_triton/exceptions.hpp>
 31 | #include <rapids_triton/memory/buffer.hpp>
 32 | #include <rapids_triton/memory/types.hpp>
 33 | #include <string>
 34 | #include <variant>
 35 | 
 36 | namespace triton { namespace backend { namespace NAMESPACE {
 37 | struct TreeliteModel {
 38 |   TreeliteModel(
 39 |       std::filesystem::path const& model_file, SerializationFormat format,
 40 |       std::shared_ptr<treelite_config> tl_config, bool predict_proba,
 41 |       bool use_herring, bool xgboost_allow_unknown_field)
 42 |       : tl_config_{tl_config},
 43 |         base_tl_model_{[&model_file, &format, predict_proba,
 44 |                         xgboost_allow_unknown_field, this]() {
 45 |           auto result = load_tl_base_model(
 46 |               model_file, format, xgboost_allow_unknown_field);
 47 |           auto num_classes = tl_get_num_classes(*base_tl_model_);
 48 |           if (!predict_proba && tl_config_->output_class && num_classes > 1) {
 49 |             result->postprocessor = "max_index";
 50 |           }
 51 |           if (predict_proba &&
 52 |               result->task_type == treelite::TaskType::kMultiClf &&
 53 |               result->leaf_vector_shape[1] == 1) {
 54 |             result->postprocessor = "softmax";
 55 |           }
 56 |           if (predict_proba &&
 57 |               result->task_type == treelite::TaskType::kMultiClf &&
 58 |               result->leaf_vector_shape[1] > 1) {
 59 |             result->postprocessor = "identity_multiclass";
 60 |           }
 61 | 
 62 |           return result;
 63 |         }()},
 64 |         num_classes_{static_cast<size_t>(tl_get_num_classes(*base_tl_model_))},
 65 |         base_herring_model_{[this, use_herring]() {
 66 |           auto result = std::optional<herring::tl_dispatched_model>{};
 67 |           if (use_herring) {
 68 |             try {
 69 |               result = std::visit(
 70 |                   [&](auto&& concrete_model) {
 71 |                     return herring::convert_model(
 72 |                         *base_tl_model_, concrete_model);
 73 |                   },
 74 |                   base_tl_model_->variant_);
 75 |               rapids::log_info(__FILE__, __LINE__)
 76 |                   << "Loaded model to Herring format";
 77 |             }
 78 |             catch (herring::unconvertible_model_exception const& herring_err) {
 79 |               result = std::nullopt;
 80 |               auto log_stream = rapids::log_info(__FILE__, __LINE__);
 81 |               log_stream << "Herring load failed with error \"";
 82 |               log_stream << herring_err.what();
 83 |               log_stream << "\"; falling back to GTIL";
 84 |             }
 85 |           }
 86 |           return result;
 87 |         }()}
 88 |   {
 89 |   }
 90 |   TreeliteModel(TreeliteModel const& other) = default;
 91 |   TreeliteModel& operator=(TreeliteModel const& other) = default;
 92 |   TreeliteModel(TreeliteModel&& other) = default;
 93 |   TreeliteModel& operator=(TreeliteModel&& other) = default;
 94 |   ~TreeliteModel() = default;
 95 | 
 96 |   auto base_tl_model() const { return base_tl_model_.get(); }
 97 |   auto* handle() const { return static_cast<void*>(base_tl_model_.get()); }
 98 |   auto num_classes() const { return num_classes_; }
 99 |   auto& config() const { return *tl_config_; }
100 | 
101 |   void predict(
102 |       rapids::Buffer<float>& output, rapids::Buffer<float const> const& input,
103 |       std::size_t samples, bool predict_proba) const
104 |   {
105 |     // Create non-owning Buffer to same memory as `output`
106 |     auto output_buffer = rapids::Buffer<float>{
107 |         output.data(), output.size(), output.mem_type(), output.device(),
108 |         output.stream()};
109 | 
110 |     if (base_herring_model_) {
111 |       std::visit(
112 |           [this, &input, &samples, &output_buffer](auto&& concrete_model) {
113 |             concrete_model.predict(
114 |                 input.data(), samples, output_buffer.data(),
115 |                 tl_config_->cpu_nthread);
116 |           },
117 |           *base_herring_model_);
118 |     } else {
119 |       auto gtil_output_size = output.size();
120 |       // GTIL expects buffer of size samples * num_classes_ for multi-class
121 |       // classifiers, but output buffer may be smaller, so we will create a
122 |       // temporary buffer
123 |       if (!predict_proba && tl_config_->output_class && num_classes_ > 1) {
124 |         gtil_output_size = samples * num_classes_;
125 |       }
126 | 
127 |       // If expected GTIL size is not the same as the size of `output`, create
128 |       // a temporary buffer of the correct size
129 |       if (gtil_output_size != output.size()) {
130 |         output_buffer =
131 |             rapids::Buffer<float>{gtil_output_size, rapids::HostMemory};
132 |       }
133 | 
134 |       auto gtil_config = treelite::gtil::Configuration{};
135 |       gtil_config.nthread = tl_config_->cpu_nthread;
136 | 
137 |       // Actually perform inference
138 |       try {
139 |         treelite::gtil::Predict(
140 |             *base_tl_model_, input.data(), samples, output_buffer.data(),
141 |             gtil_config);
142 |       }
143 |       catch (treelite::Error const& tl_err) {
144 |         throw rapids::TritonException(rapids::Error::Internal, tl_err.what());
145 |       }
146 | 
147 |       // Copy back to expected output location
148 |       if (gtil_output_size != output.size()) {
149 |         rapids::copy<float, float>(
150 |             output, output_buffer, std::size_t{}, output.size());
151 |       }
152 |     }
153 | 
154 |     // Transform probabilities to desired output if necessary
155 |     if (num_classes_ == 1 && predict_proba) {
156 |       auto i = output.size();
157 |       while (i > 0) {
158 |         --i;
159 |         output.data()[i] =
160 |             ((i % 2) == 1) ? output.data()[i / 2] : 1.0f - output.data()[i / 2];
161 |       }
162 |     } else if (
163 |         num_classes_ == 1 && !predict_proba && tl_config_->output_class) {
164 |       std::transform(
165 |           output.data(), output.data() + output.size(), output.data(),
166 |           [this](float raw_pred) {
167 |             return (raw_pred > tl_config_->threshold) ? 1.0f : 0.0f;
168 |           });
169 |     }
170 |   }
171 | 
172 |  private:
173 |   std::shared_ptr<treelite_config> tl_config_;
174 |   std::unique_ptr<treelite::Model> base_tl_model_;
175 |   std::size_t num_classes_;
176 |   std::optional<herring::tl_dispatched_model> base_herring_model_;
177 | };
178 | 
179 | }}}  // namespace triton::backend::NAMESPACE
180 | 


--------------------------------------------------------------------------------
/src/tl_utils.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | #include <names.h>
 19 | #include <serialization.h>
 20 | #include <treelite/logging.h>
 21 | #include <treelite/model_loader.h>
 22 | #include <treelite/tree.h>
 23 | 
 24 | #include <algorithm>
 25 | #include <cstddef>
 26 | #include <cstdio>
 27 | #include <filesystem>
 28 | #include <fstream>
 29 | #include <rapids_triton/exceptions.hpp>
 30 | #include <sstream>
 31 | 
 32 | namespace triton { namespace backend { namespace NAMESPACE {
 33 | 
 34 | inline auto
 35 | load_tl_base_model(
 36 |     std::filesystem::path const& model_file, SerializationFormat format,
 37 |     bool xgboost_allow_unknown_field)
 38 | {
 39 |   auto result = std::unique_ptr<treelite::Model>{};
 40 | 
 41 |   try {
 42 |     switch (format) {
 43 |       case SerializationFormat::xgboost:
 44 |         result =
 45 |             treelite::model_loader::LoadXGBoostModelLegacyBinary(model_file);
 46 |         break;
 47 |       case SerializationFormat::xgboost_json: {
 48 |         auto config_str =
 49 |             std::string("{\"allow_unknown_field\": ") +
 50 |             std::string(xgboost_allow_unknown_field ? "true" : "false") + "}";
 51 |         result = treelite::model_loader::LoadXGBoostModelJSON(
 52 |             model_file, config_str);
 53 |         break;
 54 |       }
 55 |       case SerializationFormat::xgboost_ubj: {
 56 |         auto config_str =
 57 |             std::string("{\"allow_unknown_field\": ") +
 58 |             std::string(xgboost_allow_unknown_field ? "true" : "false") + "}";
 59 |         result = treelite::model_loader::LoadXGBoostModelUBJSON(
 60 |             model_file, config_str);
 61 |         break;
 62 |       }
 63 |       case SerializationFormat::lightgbm:
 64 |         result = treelite::model_loader::LoadLightGBMModel(model_file);
 65 |         break;
 66 |       case SerializationFormat::treelite: {
 67 |         auto file = std::fstream{model_file.c_str()};
 68 |         try {
 69 |           if (file.is_open()) {
 70 |             result = treelite::Model::DeserializeFromStream(file);
 71 |           } else {
 72 |             auto log_stream = std::stringstream{};
 73 |             log_stream << "Could not open model file " << model_file;
 74 |             throw rapids::TritonException(
 75 |                 rapids::Error::Unavailable, log_stream.str());
 76 |           }
 77 |         }
 78 |         catch (treelite::Error const& err) {
 79 |           throw;
 80 |         }
 81 |         break;
 82 |       }
 83 |     }
 84 |   }
 85 |   catch (treelite::Error const& err) {
 86 |     throw rapids::TritonException(rapids::Error::Unknown, err.what());
 87 |   }
 88 |   catch (std::runtime_error const& err) {
 89 |     // This block is needed because Treelite sometimes throws a generic
 90 |     // exception
 91 |     // TODO(hcho3): Revise Treelite so that it only throws treelite::Error
 92 |     throw rapids::TritonException(rapids::Error::Unknown, err.what());
 93 |   }
 94 | 
 95 |   return result;
 96 | }
 97 | 
 98 | inline auto
 99 | tl_get_num_classes(treelite::Model const& base_model)
100 | {
101 |   TREELITE_CHECK_EQ(base_model.num_target, 1)
102 |       << "Multi-target model not supported";
103 |   return base_model.num_class[0];
104 | }
105 | 
106 | }}}  // namespace triton::backend::NAMESPACE
107 | 


--------------------------------------------------------------------------------
/src/treeshap_model.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #ifdef TRITON_ENABLE_GPU
20 | #include <cuda_runtime_api.h>
21 | #endif
22 | 
23 | #include <names.h>
24 | #include <tl_model.h>
25 | 
26 | #include <cstddef>
27 | #include <memory>
28 | #include <rapids_triton/exceptions.hpp>
29 | #include <rapids_triton/memory/buffer.hpp>
30 | #include <rapids_triton/memory/types.hpp>
31 | 
32 | namespace triton { namespace backend { namespace NAMESPACE {
33 | 
34 | template <rapids::MemoryType M>
35 | struct TreeShapModel {
36 |   using device_id_t = int;
37 | 
38 |   TreeShapModel(std::shared_ptr<TreeliteModel> tl_model)
39 |   {
40 |     throw rapids::TritonException(
41 |         rapids::Error::Unsupported,
42 |         "TreeShapModel invoked with a memory type unsupported by this build");
43 |   }
44 |   TreeShapModel(
45 |       device_id_t device_id, cudaStream_t stream,
46 |       std::shared_ptr<TreeliteModel> tl_model)
47 | 
48 |   {
49 |     throw rapids::TritonException(
50 |         rapids::Error::Unsupported,
51 |         "TreeShapModel invoked with a memory type unsupported by this build");
52 |   }
53 | 
54 |   void predict(
55 |       rapids::Buffer<float>& output, rapids::Buffer<float const> const& input,
56 |       std::size_t n_rows, std::size_t n_cols) const
57 |   {
58 |     throw rapids::TritonException(
59 |         rapids::Error::Unsupported,
60 |         "TreeShapModel invoked with a memory type unsupported by this build");
61 |   }
62 | };
63 | }}}  // namespace triton::backend::NAMESPACE
64 | 


--------------------------------------------------------------------------------