├── .clang-format ├── .dockerignore ├── .github └── ISSUE_TEMPLATE │ └── bug_report.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── all_models ├── bert │ └── fastertransformer │ │ ├── 1 │ │ ├── 1-gpu │ │ │ └── config.ini │ │ └── 2-gpu │ │ │ └── config.ini │ │ └── config.pbtxt ├── bloom │ ├── ensemble │ │ ├── 1 │ │ │ └── .tmp │ │ └── config.pbtxt │ ├── fastertransformer │ │ ├── 1 │ │ │ └── config.ini │ │ └── config.pbtxt │ ├── postprocessing │ │ ├── 1 │ │ │ └── model.py │ │ └── config.pbtxt │ └── preprocessing │ │ ├── 1 │ │ └── model.py │ │ └── config.pbtxt ├── gpt-interactive-text-generation │ ├── ensemble │ │ ├── 1 │ │ │ └── .tmp │ │ └── config.pbtxt │ ├── fastertransformer │ │ ├── 1 │ │ │ └── config.ini │ │ └── config.pbtxt │ ├── postprocessing │ │ ├── 1 │ │ │ ├── gpt2-merges.txt │ │ │ ├── gpt2-vocab.json │ │ │ ├── model.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── gpt_token_encoder.py │ │ └── config.pbtxt │ └── preprocessing │ │ ├── 1 │ │ ├── gpt2-merges.txt │ │ ├── gpt2-vocab.json │ │ ├── model.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ └── gpt_token_encoder.py │ │ └── word_list.py │ │ └── config.pbtxt ├── gpt │ ├── ensemble │ │ ├── 1 │ │ │ └── .tmp │ │ └── config.pbtxt │ ├── fastertransformer │ │ ├── 1 │ │ │ ├── .tmp │ │ │ └── config.ini │ │ └── config.pbtxt │ ├── postprocessing │ │ ├── 1 │ │ │ ├── gpt2-merges.txt │ │ │ ├── gpt2-vocab.json │ │ │ ├── model.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── gpt_token_encoder.py │ │ └── config.pbtxt │ └── preprocessing │ │ ├── 1 │ │ ├── gpt2-merges.txt │ │ ├── gpt2-vocab.json │ │ ├── model.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ └── gpt_token_encoder.py │ │ └── word_list.py │ │ └── config.pbtxt ├── gptj │ ├── ensemble │ │ ├── 1 │ │ │ └── .tmp │ │ └── config.pbtxt │ ├── fastertransformer │ │ ├── 1 │ │ │ ├── .tmp │ │ │ └── config.ini │ │ └── config.pbtxt │ ├── postprocessing │ │ ├── 1 │ │ │ ├── gpt2-merges.txt │ │ │ ├── gpt2-vocab.json │ │ │ ├── model.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── gpt_token_encoder.py │ │ └── config.pbtxt │ └── preprocessing │ │ ├── 1 │ │ ├── gpt2-merges.txt │ │ ├── gpt2-vocab.json │ │ ├── model.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ └── gpt_token_encoder.py │ │ └── word_list.py │ │ └── config.pbtxt ├── gptneox │ ├── ensemble │ │ ├── 1 │ │ │ └── .tmp │ │ └── config.pbtxt │ ├── fastertransformer │ │ ├── 1 │ │ │ └── config.ini │ │ └── config.pbtxt │ ├── postprocessing │ │ ├── 1 │ │ │ ├── 20B_tokenizer.json │ │ │ └── model.py │ │ └── config.pbtxt │ └── preprocessing │ │ ├── 1 │ │ ├── 20B_tokenizer.json │ │ ├── model.py │ │ └── word_list.py │ │ └── config.pbtxt ├── t5-encoder │ ├── fastertransformer │ │ ├── 1 │ │ │ └── .gitignore │ │ └── config.pbtxt │ └── tokenizer │ │ ├── 1 │ │ └── model.py │ │ └── config.pbtxt └── t5 │ └── fastertransformer │ ├── 1 │ ├── 1-gpu │ │ └── config.ini │ └── config.ini │ └── config.pbtxt ├── cmake ├── Modules │ └── FindNCCL.cmake └── TritonFasterTransformerBackendConfig.cmake.in ├── docker ├── Dockerfile └── create_dockerfile_and_build.py ├── docs ├── bert_guide.md ├── gpt_guide.md ├── gptj_guide.md ├── gptneox_guide.md └── t5_guide.md ├── images └── multi_gpu_multi_node_workflow.png ├── src ├── libfastertransformer.cc └── libtriton_fastertransformer.ldscript └── tools ├── benchmark.py ├── benchmark_single_node.sh ├── bert ├── create_bert_config.sh └── identity_test.py ├── create_gpt_config.sh ├── create_gptj_config.sh ├── end_to_end_test.py ├── evaluate_lambada.py ├── gpt ├── create_gpt_config.sh ├── end_to_end_test.py ├── evaluate_lambada.py ├── gpt_prompt_learning_squad_task_eval.py ├── gpt_prompt_learning_squad_task_identity_test.py ├── gptneox_jp_test.py ├── identity_test.py └── xglm_test.py ├── gpt_prompt_learning_squad_task_eval.py ├── gpt_prompt_learning_squad_task_identity_test.py ├── identity_test.py ├── interactive_text_generation ├── create_interactive_gpt_config.sh ├── end_to_end_test.py ├── identity_test.py └── triton_out_baseline ├── issue_request.py ├── kill_server.sh ├── requests ├── sample_request.json ├── sample_request_ensemble.json ├── sample_request_runtime_top_k.json ├── sample_request_single.json ├── sample_request_single_t5.json ├── sample_request_single_t5_encoder.json ├── sample_request_single_t5_ia3.json ├── sample_request_stream.json ├── sample_request_stream_t5.json └── sample_request_tokenizer.json ├── run_client.sh ├── run_server.sh ├── t5_utils ├── boolq_test.jsonl ├── boolq_test.py ├── boolq_test_hf.py ├── create_t5_config.sh ├── create_t5_encoder_config.sh ├── summarization.py ├── t5_end_to_end_test.py ├── t5_mnli_task.py ├── t5_requirement.txt ├── t5_xnli_task.py ├── test.de └── test.en ├── util.sh ├── utils.sh └── utils ├── gpt_prompt_learning.py ├── gpt_token_encoder.py └── recover_bpe.py /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/.clang-format -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/.github/ISSUE_TEMPLATE/bug_report.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/.gitignore -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/README.md -------------------------------------------------------------------------------- /all_models/bert/fastertransformer/1/1-gpu/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bert/fastertransformer/1/1-gpu/config.ini -------------------------------------------------------------------------------- /all_models/bert/fastertransformer/1/2-gpu/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bert/fastertransformer/1/2-gpu/config.ini -------------------------------------------------------------------------------- /all_models/bert/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bert/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /all_models/bloom/ensemble/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/bloom/ensemble/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bloom/ensemble/config.pbtxt -------------------------------------------------------------------------------- /all_models/bloom/fastertransformer/1/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bloom/fastertransformer/1/config.ini -------------------------------------------------------------------------------- /all_models/bloom/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bloom/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /all_models/bloom/postprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bloom/postprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/bloom/postprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bloom/postprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/bloom/preprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bloom/preprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/bloom/preprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/bloom/preprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/ensemble/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/ensemble/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/ensemble/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/fastertransformer/1/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/fastertransformer/1/config.ini -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/postprocessing/1/gpt2-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/postprocessing/1/gpt2-merges.txt -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/postprocessing/1/gpt2-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/postprocessing/1/gpt2-vocab.json -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/postprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/postprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/postprocessing/1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/postprocessing/1/utils/__init__.py -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/postprocessing/1/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/postprocessing/1/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/postprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/postprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/preprocessing/1/gpt2-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/preprocessing/1/gpt2-merges.txt -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/preprocessing/1/gpt2-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/preprocessing/1/gpt2-vocab.json -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/preprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/preprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/preprocessing/1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/preprocessing/1/utils/__init__.py -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/preprocessing/1/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/preprocessing/1/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/preprocessing/1/word_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/preprocessing/1/word_list.py -------------------------------------------------------------------------------- /all_models/gpt-interactive-text-generation/preprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt-interactive-text-generation/preprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt/ensemble/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/gpt/ensemble/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/ensemble/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt/fastertransformer/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/gpt/fastertransformer/1/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/fastertransformer/1/config.ini -------------------------------------------------------------------------------- /all_models/gpt/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt/postprocessing/1/gpt2-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/postprocessing/1/gpt2-merges.txt -------------------------------------------------------------------------------- /all_models/gpt/postprocessing/1/gpt2-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/postprocessing/1/gpt2-vocab.json -------------------------------------------------------------------------------- /all_models/gpt/postprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/postprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gpt/postprocessing/1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/postprocessing/1/utils/__init__.py -------------------------------------------------------------------------------- /all_models/gpt/postprocessing/1/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/postprocessing/1/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /all_models/gpt/postprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/postprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gpt/preprocessing/1/gpt2-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/preprocessing/1/gpt2-merges.txt -------------------------------------------------------------------------------- /all_models/gpt/preprocessing/1/gpt2-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/preprocessing/1/gpt2-vocab.json -------------------------------------------------------------------------------- /all_models/gpt/preprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/preprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gpt/preprocessing/1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/preprocessing/1/utils/__init__.py -------------------------------------------------------------------------------- /all_models/gpt/preprocessing/1/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/preprocessing/1/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /all_models/gpt/preprocessing/1/word_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/preprocessing/1/word_list.py -------------------------------------------------------------------------------- /all_models/gpt/preprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gpt/preprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptj/ensemble/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/gptj/ensemble/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/ensemble/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptj/fastertransformer/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/gptj/fastertransformer/1/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/fastertransformer/1/config.ini -------------------------------------------------------------------------------- /all_models/gptj/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptj/postprocessing/1/gpt2-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/postprocessing/1/gpt2-merges.txt -------------------------------------------------------------------------------- /all_models/gptj/postprocessing/1/gpt2-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/postprocessing/1/gpt2-vocab.json -------------------------------------------------------------------------------- /all_models/gptj/postprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/postprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gptj/postprocessing/1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/postprocessing/1/utils/__init__.py -------------------------------------------------------------------------------- /all_models/gptj/postprocessing/1/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/postprocessing/1/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /all_models/gptj/postprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/postprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptj/preprocessing/1/gpt2-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/preprocessing/1/gpt2-merges.txt -------------------------------------------------------------------------------- /all_models/gptj/preprocessing/1/gpt2-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/preprocessing/1/gpt2-vocab.json -------------------------------------------------------------------------------- /all_models/gptj/preprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/preprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gptj/preprocessing/1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/preprocessing/1/utils/__init__.py -------------------------------------------------------------------------------- /all_models/gptj/preprocessing/1/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/preprocessing/1/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /all_models/gptj/preprocessing/1/word_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/preprocessing/1/word_list.py -------------------------------------------------------------------------------- /all_models/gptj/preprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptj/preprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptneox/ensemble/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/gptneox/ensemble/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/ensemble/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptneox/fastertransformer/1/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/fastertransformer/1/config.ini -------------------------------------------------------------------------------- /all_models/gptneox/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptneox/postprocessing/1/20B_tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/postprocessing/1/20B_tokenizer.json -------------------------------------------------------------------------------- /all_models/gptneox/postprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/postprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gptneox/postprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/postprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/gptneox/preprocessing/1/20B_tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/preprocessing/1/20B_tokenizer.json -------------------------------------------------------------------------------- /all_models/gptneox/preprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/preprocessing/1/model.py -------------------------------------------------------------------------------- /all_models/gptneox/preprocessing/1/word_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/preprocessing/1/word_list.py -------------------------------------------------------------------------------- /all_models/gptneox/preprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/gptneox/preprocessing/config.pbtxt -------------------------------------------------------------------------------- /all_models/t5-encoder/fastertransformer/1/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /all_models/t5-encoder/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/t5-encoder/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /all_models/t5-encoder/tokenizer/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/t5-encoder/tokenizer/1/model.py -------------------------------------------------------------------------------- /all_models/t5-encoder/tokenizer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/t5-encoder/tokenizer/config.pbtxt -------------------------------------------------------------------------------- /all_models/t5/fastertransformer/1/1-gpu/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/t5/fastertransformer/1/1-gpu/config.ini -------------------------------------------------------------------------------- /all_models/t5/fastertransformer/1/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/t5/fastertransformer/1/config.ini -------------------------------------------------------------------------------- /all_models/t5/fastertransformer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/all_models/t5/fastertransformer/config.pbtxt -------------------------------------------------------------------------------- /cmake/Modules/FindNCCL.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/cmake/Modules/FindNCCL.cmake -------------------------------------------------------------------------------- /cmake/TritonFasterTransformerBackendConfig.cmake.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/cmake/TritonFasterTransformerBackendConfig.cmake.in -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/create_dockerfile_and_build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/docker/create_dockerfile_and_build.py -------------------------------------------------------------------------------- /docs/bert_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/docs/bert_guide.md -------------------------------------------------------------------------------- /docs/gpt_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/docs/gpt_guide.md -------------------------------------------------------------------------------- /docs/gptj_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/docs/gptj_guide.md -------------------------------------------------------------------------------- /docs/gptneox_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/docs/gptneox_guide.md -------------------------------------------------------------------------------- /docs/t5_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/docs/t5_guide.md -------------------------------------------------------------------------------- /images/multi_gpu_multi_node_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/images/multi_gpu_multi_node_workflow.png -------------------------------------------------------------------------------- /src/libfastertransformer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/src/libfastertransformer.cc -------------------------------------------------------------------------------- /src/libtriton_fastertransformer.ldscript: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/src/libtriton_fastertransformer.ldscript -------------------------------------------------------------------------------- /tools/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/benchmark.py -------------------------------------------------------------------------------- /tools/benchmark_single_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/benchmark_single_node.sh -------------------------------------------------------------------------------- /tools/bert/create_bert_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/bert/create_bert_config.sh -------------------------------------------------------------------------------- /tools/bert/identity_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/bert/identity_test.py -------------------------------------------------------------------------------- /tools/create_gpt_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/create_gpt_config.sh -------------------------------------------------------------------------------- /tools/create_gptj_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/create_gptj_config.sh -------------------------------------------------------------------------------- /tools/end_to_end_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/end_to_end_test.py -------------------------------------------------------------------------------- /tools/evaluate_lambada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/evaluate_lambada.py -------------------------------------------------------------------------------- /tools/gpt/create_gpt_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/create_gpt_config.sh -------------------------------------------------------------------------------- /tools/gpt/end_to_end_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/end_to_end_test.py -------------------------------------------------------------------------------- /tools/gpt/evaluate_lambada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/evaluate_lambada.py -------------------------------------------------------------------------------- /tools/gpt/gpt_prompt_learning_squad_task_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/gpt_prompt_learning_squad_task_eval.py -------------------------------------------------------------------------------- /tools/gpt/gpt_prompt_learning_squad_task_identity_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/gpt_prompt_learning_squad_task_identity_test.py -------------------------------------------------------------------------------- /tools/gpt/gptneox_jp_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/gptneox_jp_test.py -------------------------------------------------------------------------------- /tools/gpt/identity_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/identity_test.py -------------------------------------------------------------------------------- /tools/gpt/xglm_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt/xglm_test.py -------------------------------------------------------------------------------- /tools/gpt_prompt_learning_squad_task_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt_prompt_learning_squad_task_eval.py -------------------------------------------------------------------------------- /tools/gpt_prompt_learning_squad_task_identity_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/gpt_prompt_learning_squad_task_identity_test.py -------------------------------------------------------------------------------- /tools/identity_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/identity_test.py -------------------------------------------------------------------------------- /tools/interactive_text_generation/create_interactive_gpt_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/interactive_text_generation/create_interactive_gpt_config.sh -------------------------------------------------------------------------------- /tools/interactive_text_generation/end_to_end_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/interactive_text_generation/end_to_end_test.py -------------------------------------------------------------------------------- /tools/interactive_text_generation/identity_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/interactive_text_generation/identity_test.py -------------------------------------------------------------------------------- /tools/interactive_text_generation/triton_out_baseline: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/interactive_text_generation/triton_out_baseline -------------------------------------------------------------------------------- /tools/issue_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/issue_request.py -------------------------------------------------------------------------------- /tools/kill_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/kill_server.sh -------------------------------------------------------------------------------- /tools/requests/sample_request.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request.json -------------------------------------------------------------------------------- /tools/requests/sample_request_ensemble.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_ensemble.json -------------------------------------------------------------------------------- /tools/requests/sample_request_runtime_top_k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_runtime_top_k.json -------------------------------------------------------------------------------- /tools/requests/sample_request_single.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_single.json -------------------------------------------------------------------------------- /tools/requests/sample_request_single_t5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_single_t5.json -------------------------------------------------------------------------------- /tools/requests/sample_request_single_t5_encoder.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_single_t5_encoder.json -------------------------------------------------------------------------------- /tools/requests/sample_request_single_t5_ia3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_single_t5_ia3.json -------------------------------------------------------------------------------- /tools/requests/sample_request_stream.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_stream.json -------------------------------------------------------------------------------- /tools/requests/sample_request_stream_t5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_stream_t5.json -------------------------------------------------------------------------------- /tools/requests/sample_request_tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/requests/sample_request_tokenizer.json -------------------------------------------------------------------------------- /tools/run_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/run_client.sh -------------------------------------------------------------------------------- /tools/run_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/run_server.sh -------------------------------------------------------------------------------- /tools/t5_utils/boolq_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/boolq_test.jsonl -------------------------------------------------------------------------------- /tools/t5_utils/boolq_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/boolq_test.py -------------------------------------------------------------------------------- /tools/t5_utils/boolq_test_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/boolq_test_hf.py -------------------------------------------------------------------------------- /tools/t5_utils/create_t5_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/create_t5_config.sh -------------------------------------------------------------------------------- /tools/t5_utils/create_t5_encoder_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/create_t5_encoder_config.sh -------------------------------------------------------------------------------- /tools/t5_utils/summarization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/summarization.py -------------------------------------------------------------------------------- /tools/t5_utils/t5_end_to_end_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/t5_end_to_end_test.py -------------------------------------------------------------------------------- /tools/t5_utils/t5_mnli_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/t5_mnli_task.py -------------------------------------------------------------------------------- /tools/t5_utils/t5_requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/t5_requirement.txt -------------------------------------------------------------------------------- /tools/t5_utils/t5_xnli_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/t5_xnli_task.py -------------------------------------------------------------------------------- /tools/t5_utils/test.de: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/test.de -------------------------------------------------------------------------------- /tools/t5_utils/test.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/t5_utils/test.en -------------------------------------------------------------------------------- /tools/util.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/util.sh -------------------------------------------------------------------------------- /tools/utils.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/utils.sh -------------------------------------------------------------------------------- /tools/utils/gpt_prompt_learning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/utils/gpt_prompt_learning.py -------------------------------------------------------------------------------- /tools/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /tools/utils/recover_bpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/HEAD/tools/utils/recover_bpe.py --------------------------------------------------------------------------------