├── .devcontainer ├── Dockerfile.trtllm └── devcontainer.json ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ ├── feature-request.yml │ └── new-model-addition.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── push_docker_image.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .redocly.lint-ignore.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── Dockerfile.nix ├── Dockerfile.trtllm ├── Dockerfile_amd ├── Dockerfile_intel ├── LICENSE ├── Makefile ├── README.md ├── assets ├── architecture.png ├── benchmark.png └── tgi_grafana.json ├── backends ├── client │ ├── Cargo.toml │ ├── build.rs │ └── src │ │ ├── lib.rs │ │ ├── v2 │ │ ├── client.rs │ │ ├── mod.rs │ │ ├── pb │ │ │ ├── generate.v2.rs │ │ │ └── mod.rs │ │ └── sharded_client.rs │ │ └── v3 │ │ ├── client.rs │ │ ├── mod.rs │ │ ├── pb │ │ ├── generate.v3.rs │ │ └── mod.rs │ │ └── sharded_client.rs ├── grpc-metadata │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── trtllm │ ├── CMakeLists.txt │ ├── Cargo.toml │ ├── Dockerfile │ ├── README.md │ ├── build.rs │ ├── cmake │ │ ├── fmt.cmake │ │ ├── json.cmake │ │ ├── spdlog.cmake │ │ ├── trtllm.cmake │ │ └── utils │ │ │ └── detect_cuda_arch.cu │ ├── include │ │ ├── backend.h │ │ ├── ffi.h │ │ └── hardware.h │ ├── lib │ │ └── backend.cpp │ ├── scripts │ │ └── install_tensorrt.sh │ ├── src │ │ ├── backend.rs │ │ ├── errors.rs │ │ ├── ffi.cpp │ │ ├── lib.rs │ │ └── main.rs │ └── tests │ │ └── infer_test.cpp ├── v2 │ ├── Cargo.toml │ ├── build.rs │ └── src │ │ ├── backend.rs │ │ ├── client │ │ ├── grpc_client.rs │ │ ├── mod.rs │ │ └── sharded_client.rs │ │ ├── lib.rs │ │ ├── main.rs │ │ └── queue.rs └── v3 │ ├── Cargo.toml │ ├── benches │ └── prefix_cache.rs │ ├── build.rs │ └── src │ ├── backend.rs │ ├── block_allocator.rs │ ├── client │ ├── grpc_client.rs │ ├── mod.rs │ └── sharded_client.rs │ ├── lib.rs │ ├── main.rs │ ├── queue.rs │ └── radix.rs ├── benchmark ├── Cargo.toml ├── README.md └── src │ ├── app.rs │ ├── event.rs │ ├── generation.rs │ ├── lib.rs │ ├── main.rs │ ├── table.rs │ └── utils.rs ├── clients └── python │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── poetry.lock │ ├── pyproject.toml │ ├── tests │ ├── conftest.py │ ├── test_client.py │ ├── test_errors.py │ ├── test_inference_api.py │ └── test_types.py │ └── text_generation │ ├── __init__.py │ ├── client.py │ ├── errors.py │ ├── inference_api.py │ └── types.py ├── docs ├── README.md ├── index.html ├── openapi.json └── source │ ├── _toctree.yml │ ├── architecture.md │ ├── basic_tutorials │ ├── consuming_tgi.md │ ├── gated_model_access.md │ ├── monitoring.md │ ├── non_core_models.md │ ├── preparing_model.md │ ├── safety.md │ ├── train_medusa.md │ ├── using_cli.md │ ├── using_guidance.md │ └── visual_language_models.md │ ├── conceptual │ ├── external.md │ ├── flash_attention.md │ ├── guidance.md │ ├── lora.md │ ├── paged_attention.md │ ├── quantization.md │ ├── safetensors.md │ ├── speculation.md │ ├── streaming.md │ └── tensor_parallelism.md │ ├── index.md │ ├── installation.md │ ├── installation_amd.md │ ├── installation_gaudi.md │ ├── installation_inferentia.md │ ├── installation_intel.md │ ├── installation_nvidia.md │ ├── quicktour.md │ ├── reference │ ├── api_reference.md │ ├── launcher.md │ └── metrics.md │ ├── supported_models.md │ └── usage_statistics.md ├── examples ├── README.md ├── requirements.txt ├── run_generation.py └── tgi_client.py ├── flake.lock ├── flake.nix ├── integration-tests ├── conftest.py ├── images │ ├── chicken_on_money.png │ └── cow_beach.png ├── models │ ├── __snapshots__ │ │ ├── test_bloom_560m │ │ │ ├── test_bloom_560m.json │ │ │ ├── test_bloom_560m_all_params.json │ │ │ └── test_bloom_560m_load.json │ │ ├── test_bloom_560m_sharded │ │ │ ├── test_bloom_560m_sharded.json │ │ │ └── test_bloom_560m_sharded_load.json │ │ ├── test_chat_llama │ │ │ └── test_flash_llama_simple.json │ │ ├── test_completion_prompts │ │ │ ├── test_flash_llama_completion_many_prompts.json │ │ │ ├── test_flash_llama_completion_many_prompts_stream.json │ │ │ ├── test_flash_llama_completion_single_prompt.json │ │ │ └── test_flash_llama_completion_stream_usage.json │ │ ├── test_flash_awq │ │ │ ├── test_flash_llama_awq.json │ │ │ ├── test_flash_llama_awq_all_params.json │ │ │ └── test_flash_llama_awq_load.json │ │ ├── test_flash_awq_sharded │ │ │ ├── test_flash_llama_awq_load_sharded.json │ │ │ └── test_flash_llama_awq_sharded.json │ │ ├── test_flash_deepseek_v2 │ │ │ ├── test_flash_deepseek_v2.json │ │ │ ├── test_flash_deepseek_v2_all_params.json │ │ │ └── test_flash_deepseek_v2_load.json │ │ ├── test_flash_falcon │ │ │ ├── test_flash_falcon.json │ │ │ ├── test_flash_falcon_all_params.json │ │ │ └── test_flash_falcon_load.json │ │ ├── test_flash_gemma │ │ │ ├── test_flash_gemma.json │ │ │ ├── test_flash_gemma_all_params.json │ │ │ └── test_flash_gemma_load.json │ │ ├── test_flash_gemma2 │ │ │ ├── test_flash_gemma2.json │ │ │ └── test_flash_gemma2_load.json │ │ ├── test_flash_gemma_gptq │ │ │ ├── test_flash_gemma_gptq.json │ │ │ ├── test_flash_gemma_gptq_all_params.json │ │ │ └── test_flash_gemma_gptq_load.json │ │ ├── test_flash_gpt2 │ │ │ ├── test_flash_gpt2.json │ │ │ └── test_flash_gpt2_load.json │ │ ├── test_flash_grammar_llama │ │ │ ├── test_flash_llama_grammar.json │ │ │ ├── test_flash_llama_grammar_json.json │ │ │ ├── test_flash_llama_grammar_load.json │ │ │ ├── test_flash_llama_grammar_regex.json │ │ │ └── test_flash_llama_grammar_single_load_instance.json │ │ ├── test_flash_llama │ │ │ ├── test_flash_llama.json │ │ │ ├── test_flash_llama_all_params.json │ │ │ └── test_flash_llama_load.json │ │ ├── test_flash_llama_exl2 │ │ │ ├── test_flash_llama_exl2.json │ │ │ ├── test_flash_llama_exl2_all_params.json │ │ │ └── test_flash_llama_exl2_load.json │ │ ├── test_flash_llama_fp8 │ │ │ ├── test_flash_llama_fp8.json │ │ │ ├── test_flash_llama_fp8_all_params.json │ │ │ └── test_flash_llama_fp8_load.json │ │ ├── test_flash_llama_gptq │ │ │ ├── test_flash_llama_gptq.json │ │ │ ├── test_flash_llama_gptq_all_params.json │ │ │ └── test_flash_llama_gptq_load.json │ │ ├── test_flash_llama_marlin │ │ │ ├── test_flash_llama_marlin.json │ │ │ ├── test_flash_llama_marlin_all_params.json │ │ │ └── test_flash_llama_marlin_load.json │ │ ├── test_flash_llama_marlin_24 │ │ │ ├── test_flash_llama_marlin.json │ │ │ ├── test_flash_llama_marlin24_all_params.json │ │ │ └── test_flash_llama_marlin24_load.json │ │ ├── test_flash_llama_prefix │ │ │ └── test_flash_llama_load.json │ │ ├── test_flash_llama_prefix_flashdecoding │ │ │ └── test_flash_llama_flashdecoding.json │ │ ├── test_flash_medusa │ │ │ ├── test_flash_medusa_all_params.json │ │ │ ├── test_flash_medusa_load.json │ │ │ └── test_flash_medusa_simple.json │ │ ├── test_flash_mistral │ │ │ ├── test_flash_mistral.json │ │ │ ├── test_flash_mistral_all_params.json │ │ │ └── test_flash_mistral_load.json │ │ ├── test_flash_mixtral │ │ │ ├── test_flash_mixtral.json │ │ │ ├── test_flash_mixtral_all_params.json │ │ │ └── test_flash_mixtral_load.json │ │ ├── test_flash_mixtral_gptq │ │ │ ├── test_flash_mixtral_gptq.json │ │ │ ├── test_flash_mixtral_gptq_all_params.json │ │ │ └── test_flash_mixtral_gptq_load.json │ │ ├── test_flash_neox │ │ │ ├── test_flash_neox.json │ │ │ └── test_flash_neox_load.json │ │ ├── test_flash_neox_sharded │ │ │ ├── test_flash_neox.json │ │ │ └── test_flash_neox_load.json │ │ ├── test_flash_pali_gemma │ │ │ ├── test_flash_pali_gemma.json │ │ │ └── test_flash_pali_gemma_two_images.json │ │ ├── test_flash_phi │ │ │ ├── test_flash_phi.json │ │ │ ├── test_flash_phi_all_params.json │ │ │ └── test_flash_phi_load.json │ │ ├── test_flash_phi35_moe │ │ │ ├── test_flash_phi35_moe.json │ │ │ ├── test_flash_phi35_moe_all_params.json │ │ │ └── test_flash_phi35_moe_load.json │ │ ├── test_flash_qwen2 │ │ │ ├── test_flash_qwen2.json │ │ │ ├── test_flash_qwen2_all_params.json │ │ │ └── test_flash_qwen2_load.json │ │ ├── test_flash_santacoder │ │ │ ├── test_flash_santacoder.json │ │ │ └── test_flash_santacoder_load.json │ │ ├── test_flash_starcoder │ │ │ ├── test_flash_starcoder.json │ │ │ ├── test_flash_starcoder_default_params.json │ │ │ └── test_flash_starcoder_load.json │ │ ├── test_flash_starcoder2 │ │ │ ├── test_flash_starcoder2.json │ │ │ ├── test_flash_starcoder2_default_params.json │ │ │ └── test_flash_starcoder2_load.json │ │ ├── test_flash_starcoder_gptq │ │ │ ├── test_flash_starcoder_gptq.json │ │ │ ├── test_flash_starcoder_gptq_default_params.json │ │ │ └── test_flash_starcoder_gptq_load.json │ │ ├── test_grammar_llama │ │ │ └── test_non_flash_llama_grammar_json.json │ │ ├── test_grammar_response_format_llama │ │ │ └── test_grammar_response_format_llama_json.json │ │ ├── test_idefics │ │ │ ├── test_idefics.json │ │ │ ├── test_idefics_load.json │ │ │ └── test_idefics_two_images.json │ │ ├── test_idefics2 │ │ │ ├── test_flash_idefics2_next_all_params.json │ │ │ ├── test_flash_idefics2_next_load.json │ │ │ ├── test_flash_idefics2_next_simple.json │ │ │ └── test_flash_idefics2_two_images.json │ │ ├── test_llava_next │ │ │ ├── test_flash_llava_next_all_params.json │ │ │ ├── test_flash_llava_next_load.json │ │ │ └── test_flash_llava_next_simple.json │ │ ├── test_lora_mistral │ │ │ ├── test_lora_mistral_with_customer_support_adapter.json │ │ │ ├── test_lora_mistral_with_dbpedia_adapter.json │ │ │ ├── test_lora_mistral_without_adapter.json │ │ │ └── test_lora_mistral_without_customer_support_adapter.json │ │ ├── test_mamba │ │ │ ├── test_mamba.json │ │ │ ├── test_mamba_all_params.json │ │ │ └── test_mamba_load.json │ │ ├── test_mllama │ │ │ ├── test_mllama_load.json │ │ │ └── test_mllama_simpl.json │ │ ├── test_mpt │ │ │ ├── test_mpt.json │ │ │ └── test_mpt_load.json │ │ ├── test_mt0_base │ │ │ ├── test_mt0_base.json │ │ │ ├── test_mt0_base_all_params.json │ │ │ └── test_mt0_base_load.json │ │ ├── test_neox │ │ │ ├── test_neox.json │ │ │ └── test_neox_load.json │ │ ├── test_neox_sharded │ │ │ ├── test_neox.json │ │ │ └── test_neox_load.json │ │ ├── test_server_gptq_quantized │ │ │ ├── test_server_gptq_quantized.json │ │ │ ├── test_server_gptq_quantized_all_params.json │ │ │ └── test_server_gptq_quantized_load.json │ │ ├── test_t5_sharded │ │ │ ├── test_t5_sharded.json │ │ │ └── test_t5_sharded_load.json │ │ └── test_tools_llama │ │ │ ├── test_flash_llama_grammar_tools.json │ │ │ ├── test_flash_llama_grammar_tools_auto.json │ │ │ ├── test_flash_llama_grammar_tools_choice.json │ │ │ ├── test_flash_llama_grammar_tools_insufficient_information.json │ │ │ └── test_flash_llama_grammar_tools_stream.json │ ├── test_bloom_560m.py │ ├── test_bloom_560m_sharded.py │ ├── test_chat_llama.py │ ├── test_completion_prompts.py │ ├── test_flash_awq.py │ ├── test_flash_awq_sharded.py │ ├── test_flash_deepseek_v2.py │ ├── test_flash_falcon.py │ ├── test_flash_gemma.py │ ├── test_flash_gemma2.py │ ├── test_flash_gemma_gptq.py │ ├── test_flash_gpt2.py │ ├── test_flash_grammar_llama.py │ ├── test_flash_llama.py │ ├── test_flash_llama_exl2.py │ ├── test_flash_llama_fp8.py │ ├── test_flash_llama_gptq.py │ ├── test_flash_llama_marlin.py │ ├── test_flash_llama_marlin_24.py │ ├── test_flash_llama_prefix.py │ ├── test_flash_llama_prefix_flashdecoding.py │ ├── test_flash_medusa.py │ ├── test_flash_mistral.py │ ├── test_flash_mixtral.py │ ├── test_flash_mixtral_gptq.py │ ├── test_flash_neox.py │ ├── test_flash_neox_sharded.py │ ├── test_flash_pali_gemma.py │ ├── test_flash_phi.py │ ├── test_flash_phi35_moe.py │ ├── test_flash_qwen2.py │ ├── test_flash_santacoder.py │ ├── test_flash_starcoder.py │ ├── test_flash_starcoder2.py │ ├── test_flash_starcoder_gptq.py │ ├── test_grammar_llama.py │ ├── test_grammar_response_format_llama.py │ ├── test_idefics.py │ ├── test_idefics2.py │ ├── test_llava_next.py │ ├── test_lora_mistral.py │ ├── test_mamba.py │ ├── test_mllama.py │ ├── test_mpt.py │ ├── test_mt0_base.py │ ├── test_neox.py │ ├── test_neox_sharded.py │ ├── test_opt.py │ ├── test_t5_sharded.py │ └── test_tools_llama.py ├── poetry.lock ├── pyproject.toml ├── pytest.ini └── requirements.txt ├── launcher ├── Cargo.toml ├── build.rs └── src │ ├── env_runtime.rs │ ├── gpu.rs │ └── main.rs ├── load_tests ├── Makefile ├── common.js ├── filter.py └── orca.py ├── nix ├── client.nix ├── crate-overrides.nix ├── docker.nix ├── impure-shell.nix └── server.nix ├── proto ├── generate.proto └── v3 │ └── generate.proto ├── router ├── Cargo.toml ├── README.md ├── build.rs └── src │ ├── config.rs │ ├── infer │ ├── chat_template.rs │ ├── mod.rs │ └── tool_grammar.rs │ ├── kserve.rs │ ├── lib.rs │ ├── logging.rs │ ├── main.rs.back │ ├── server.rs │ ├── usage_stats.rs │ ├── validation.rs │ └── vertex.rs ├── rust-toolchain.toml ├── sagemaker-entrypoint.sh ├── server ├── .gitignore ├── Makefile ├── Makefile-awq ├── Makefile-eetq ├── Makefile-exllamav2 ├── Makefile-fbgemm ├── Makefile-flash-att ├── Makefile-flash-att-v2 ├── Makefile-flashinfer ├── Makefile-lorax-punica ├── Makefile-selective-scan ├── Makefile-vllm ├── README.md ├── custom_kernels │ ├── custom_kernels │ │ ├── fused_attention_cuda.cu │ │ └── fused_bloom_attention_cuda.cu │ └── setup.py ├── dill-0.3.7-patch.sh ├── dill-0.3.8-patch.sh ├── exllama_kernels │ ├── exllama_kernels │ │ ├── cu_compat.cuh │ │ ├── cuda_buffers.cu │ │ ├── cuda_buffers.cuh │ │ ├── cuda_func │ │ │ ├── column_remap.cu │ │ │ ├── column_remap.cuh │ │ │ ├── q4_matmul.cu │ │ │ ├── q4_matmul.cuh │ │ │ ├── q4_matrix.cu │ │ │ └── q4_matrix.cuh │ │ ├── exllama_ext.cpp │ │ ├── hip_compat.cuh │ │ ├── matrix.cuh │ │ ├── tuning.h │ │ └── util.cuh │ └── setup.py ├── exllamav2_kernels │ ├── exllamav2_kernels │ │ ├── config.h │ │ ├── cpp │ │ │ └── util.h │ │ ├── cuda │ │ │ ├── compat.cuh │ │ │ ├── matrix_view.cuh │ │ │ ├── q_gemm.cu │ │ │ ├── q_gemm.cuh │ │ │ ├── q_gemm_kernel.cuh │ │ │ ├── q_gemm_kernel_gptq.cuh │ │ │ ├── q_matrix.cu │ │ │ ├── q_matrix.cuh │ │ │ ├── quant │ │ │ │ ├── qdq_2.cuh │ │ │ │ ├── qdq_3.cuh │ │ │ │ ├── qdq_4.cuh │ │ │ │ ├── qdq_5.cuh │ │ │ │ ├── qdq_6.cuh │ │ │ │ ├── qdq_8.cuh │ │ │ │ └── qdq_util.cuh │ │ │ └── util.cuh │ │ └── ext.cpp │ └── setup.py ├── poetry.lock ├── pyproject.toml ├── requirements.txt ├── requirements_cuda.txt ├── requirements_intel.txt ├── requirements_rocm.txt ├── tests │ ├── conftest.py │ ├── models │ │ ├── test_bloom.py │ │ ├── test_causal_lm.py │ │ ├── test_model.py │ │ ├── test_santacoder.py │ │ └── test_seq2seq_lm.py │ └── utils │ │ ├── test_adapter.py │ │ ├── test_convert.py │ │ ├── test_hub.py │ │ ├── test_layers.py │ │ ├── test_tokens.py │ │ ├── test_watermark.py │ │ └── test_weights.py └── text_generation_server │ ├── __init__.py │ ├── adapters │ ├── __init__.py │ ├── config.py │ ├── lora.py │ └── weights.py │ ├── cache.py │ ├── cli.py │ ├── habana_quantization_env.py │ ├── interceptor.py │ ├── layers │ ├── __init__.py │ ├── attention │ │ ├── __init__.py │ │ ├── common.py │ │ ├── cuda.py │ │ ├── flash_attn_triton.py │ │ ├── flashinfer.py │ │ ├── ipex.py │ │ └── rocm.py │ ├── awq │ │ ├── conversion_utils.py │ │ └── quantize │ │ │ └── qmodule.py │ ├── bnb.py │ ├── conv.py │ ├── eetq.py │ ├── exl2.py │ ├── fp8.py │ ├── gptq │ │ ├── __init__.py │ │ ├── custom_autotune.py │ │ ├── exllama.py │ │ ├── exllamav2.py │ │ ├── quant_linear.py │ │ ├── quantize.py │ │ └── utils.py │ ├── layernorm.py │ ├── linear.py │ ├── lora.py │ ├── marlin │ │ ├── __init__.py │ │ ├── fp8.py │ │ ├── gptq.py │ │ ├── marlin.py │ │ └── util.py │ ├── medusa.py │ ├── mlp.py │ ├── moe │ │ ├── __init__.py │ │ ├── fused_moe_rocm.py │ │ ├── gptq_marlin.py │ │ └── unquantized.py │ ├── rotary.py │ ├── speculative.py │ └── tensor_parallel.py │ ├── models │ ├── __init__.py │ ├── bloom.py │ ├── causal_lm.py │ ├── custom_modeling │ │ ├── __init__.py │ │ ├── bloom_modeling.py │ │ ├── clip.py │ │ ├── flash_cohere_modeling.py │ │ ├── flash_dbrx_modeling.py │ │ ├── flash_deepseek_v2_modeling.py │ │ ├── flash_gemma2_modeling.py │ │ ├── flash_gemma_modeling.py │ │ ├── flash_gpt2_modeling.py │ │ ├── flash_gptj_modeling.py │ │ ├── flash_llama_modeling.py │ │ ├── flash_mistral_modeling.py │ │ ├── flash_mixtral_modeling.py │ │ ├── flash_neox_modeling.py │ │ ├── flash_pali_gemma_modeling.py │ │ ├── flash_phi_modeling.py │ │ ├── flash_phi_moe_modeling.py │ │ ├── flash_qwen2_modeling.py │ │ ├── flash_rw_modeling.py │ │ ├── flash_santacoder_modeling.py │ │ ├── flash_starcoder2_modeling.py │ │ ├── idefics2.py │ │ ├── idefics_config.py │ │ ├── idefics_image_processing.py │ │ ├── idefics_modeling.py │ │ ├── idefics_perceiver.py │ │ ├── idefics_processing.py │ │ ├── idefics_vision.py │ │ ├── llava_next.py │ │ ├── mamba_modeling.py │ │ ├── mllama.py │ │ ├── mpt_modeling.py │ │ ├── neox_modeling.py │ │ ├── opt_modeling.py │ │ ├── phi_modeling.py │ │ ├── siglip.py │ │ ├── t5_modeling.py │ │ └── vlm.py │ ├── flash_causal_lm.py │ ├── galactica.py │ ├── globals.py │ ├── idefics_causal_lm.py │ ├── mamba.py │ ├── mllama_causal_lm.py │ ├── model.py │ ├── pali_gemma.py │ ├── seq2seq_lm.py │ ├── starcoder.py │ ├── types.py │ └── vlm_causal_lm.py │ ├── pb │ └── .gitignore │ ├── server.py │ ├── tgi_service.py │ ├── tracing.py │ └── utils │ ├── __init__.py │ ├── adapter.py │ ├── chunks.py │ ├── convert.py │ ├── debug.py │ ├── dist.py │ ├── hub.py │ ├── import_utils.py │ ├── log.py │ ├── logits_process.py │ ├── merges │ ├── strategies.py │ └── utils.py │ ├── peft.py │ ├── quantization.py │ ├── segments.py │ ├── sgmv.py │ ├── speculate.py │ ├── tokens.py │ ├── version.py │ ├── watermark.py │ └── weights.py ├── tgi-entrypoint.sh └── update_doc.py /.devcontainer/Dockerfile.trtllm: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.github/ISSUE_TEMPLATE/bug-report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 2.1 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.github/ISSUE_TEMPLATE/feature-request.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-model-addition.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.github/ISSUE_TEMPLATE/new-model-addition.yml -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/workflows/push_docker_image.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.github/workflows/push_docker_image.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.redocly.lint-ignore.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/.redocly.lint-ignore.yaml -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Cargo.lock -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Cargo.toml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Dockerfile -------------------------------------------------------------------------------- /Dockerfile.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Dockerfile.nix -------------------------------------------------------------------------------- /Dockerfile.trtllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Dockerfile.trtllm -------------------------------------------------------------------------------- /Dockerfile_amd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Dockerfile_amd -------------------------------------------------------------------------------- /Dockerfile_intel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Dockerfile_intel -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/README.md -------------------------------------------------------------------------------- /assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/assets/architecture.png -------------------------------------------------------------------------------- /assets/benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/assets/benchmark.png -------------------------------------------------------------------------------- /assets/tgi_grafana.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/assets/tgi_grafana.json -------------------------------------------------------------------------------- /backends/client/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/Cargo.toml -------------------------------------------------------------------------------- /backends/client/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/build.rs -------------------------------------------------------------------------------- /backends/client/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/lib.rs -------------------------------------------------------------------------------- /backends/client/src/v2/client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v2/client.rs -------------------------------------------------------------------------------- /backends/client/src/v2/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v2/mod.rs -------------------------------------------------------------------------------- /backends/client/src/v2/pb/generate.v2.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v2/pb/generate.v2.rs -------------------------------------------------------------------------------- /backends/client/src/v2/pb/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v2/pb/mod.rs -------------------------------------------------------------------------------- /backends/client/src/v2/sharded_client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v2/sharded_client.rs -------------------------------------------------------------------------------- /backends/client/src/v3/client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v3/client.rs -------------------------------------------------------------------------------- /backends/client/src/v3/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v3/mod.rs -------------------------------------------------------------------------------- /backends/client/src/v3/pb/generate.v3.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v3/pb/generate.v3.rs -------------------------------------------------------------------------------- /backends/client/src/v3/pb/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v3/pb/mod.rs -------------------------------------------------------------------------------- /backends/client/src/v3/sharded_client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/client/src/v3/sharded_client.rs -------------------------------------------------------------------------------- /backends/grpc-metadata/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/grpc-metadata/Cargo.toml -------------------------------------------------------------------------------- /backends/grpc-metadata/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/grpc-metadata/src/lib.rs -------------------------------------------------------------------------------- /backends/trtllm/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/CMakeLists.txt -------------------------------------------------------------------------------- /backends/trtllm/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/Cargo.toml -------------------------------------------------------------------------------- /backends/trtllm/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/Dockerfile -------------------------------------------------------------------------------- /backends/trtllm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/README.md -------------------------------------------------------------------------------- /backends/trtllm/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/build.rs -------------------------------------------------------------------------------- /backends/trtllm/cmake/fmt.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/cmake/fmt.cmake -------------------------------------------------------------------------------- /backends/trtllm/cmake/json.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/cmake/json.cmake -------------------------------------------------------------------------------- /backends/trtllm/cmake/spdlog.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/cmake/spdlog.cmake -------------------------------------------------------------------------------- /backends/trtllm/cmake/trtllm.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/cmake/trtllm.cmake -------------------------------------------------------------------------------- /backends/trtllm/cmake/utils/detect_cuda_arch.cu: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backends/trtllm/include/backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/include/backend.h -------------------------------------------------------------------------------- /backends/trtllm/include/ffi.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/include/ffi.h -------------------------------------------------------------------------------- /backends/trtllm/include/hardware.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/include/hardware.h -------------------------------------------------------------------------------- /backends/trtllm/lib/backend.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/lib/backend.cpp -------------------------------------------------------------------------------- /backends/trtllm/scripts/install_tensorrt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/scripts/install_tensorrt.sh -------------------------------------------------------------------------------- /backends/trtllm/src/backend.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/src/backend.rs -------------------------------------------------------------------------------- /backends/trtllm/src/errors.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/src/errors.rs -------------------------------------------------------------------------------- /backends/trtllm/src/ffi.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/src/ffi.cpp -------------------------------------------------------------------------------- /backends/trtllm/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/src/lib.rs -------------------------------------------------------------------------------- /backends/trtllm/src/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/src/main.rs -------------------------------------------------------------------------------- /backends/trtllm/tests/infer_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/trtllm/tests/infer_test.cpp -------------------------------------------------------------------------------- /backends/v2/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/Cargo.toml -------------------------------------------------------------------------------- /backends/v2/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/build.rs -------------------------------------------------------------------------------- /backends/v2/src/backend.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/src/backend.rs -------------------------------------------------------------------------------- /backends/v2/src/client/grpc_client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/src/client/grpc_client.rs -------------------------------------------------------------------------------- /backends/v2/src/client/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/src/client/mod.rs -------------------------------------------------------------------------------- /backends/v2/src/client/sharded_client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/src/client/sharded_client.rs -------------------------------------------------------------------------------- /backends/v2/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/src/lib.rs -------------------------------------------------------------------------------- /backends/v2/src/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/src/main.rs -------------------------------------------------------------------------------- /backends/v2/src/queue.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v2/src/queue.rs -------------------------------------------------------------------------------- /backends/v3/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/Cargo.toml -------------------------------------------------------------------------------- /backends/v3/benches/prefix_cache.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/benches/prefix_cache.rs -------------------------------------------------------------------------------- /backends/v3/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/build.rs -------------------------------------------------------------------------------- /backends/v3/src/backend.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/backend.rs -------------------------------------------------------------------------------- /backends/v3/src/block_allocator.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/block_allocator.rs -------------------------------------------------------------------------------- /backends/v3/src/client/grpc_client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/client/grpc_client.rs -------------------------------------------------------------------------------- /backends/v3/src/client/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/client/mod.rs -------------------------------------------------------------------------------- /backends/v3/src/client/sharded_client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/client/sharded_client.rs -------------------------------------------------------------------------------- /backends/v3/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/lib.rs -------------------------------------------------------------------------------- /backends/v3/src/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/main.rs -------------------------------------------------------------------------------- /backends/v3/src/queue.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/queue.rs -------------------------------------------------------------------------------- /backends/v3/src/radix.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/backends/v3/src/radix.rs -------------------------------------------------------------------------------- /benchmark/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/Cargo.toml -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/README.md -------------------------------------------------------------------------------- /benchmark/src/app.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/src/app.rs -------------------------------------------------------------------------------- /benchmark/src/event.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/src/event.rs -------------------------------------------------------------------------------- /benchmark/src/generation.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/src/generation.rs -------------------------------------------------------------------------------- /benchmark/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/src/lib.rs -------------------------------------------------------------------------------- /benchmark/src/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/src/main.rs -------------------------------------------------------------------------------- /benchmark/src/table.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/src/table.rs -------------------------------------------------------------------------------- /benchmark/src/utils.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/benchmark/src/utils.rs -------------------------------------------------------------------------------- /clients/python/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/.gitignore -------------------------------------------------------------------------------- /clients/python/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/Makefile -------------------------------------------------------------------------------- /clients/python/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/README.md -------------------------------------------------------------------------------- /clients/python/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/poetry.lock -------------------------------------------------------------------------------- /clients/python/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/pyproject.toml -------------------------------------------------------------------------------- /clients/python/tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/tests/conftest.py -------------------------------------------------------------------------------- /clients/python/tests/test_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/tests/test_client.py -------------------------------------------------------------------------------- /clients/python/tests/test_errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/tests/test_errors.py -------------------------------------------------------------------------------- /clients/python/tests/test_inference_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/tests/test_inference_api.py -------------------------------------------------------------------------------- /clients/python/tests/test_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/tests/test_types.py -------------------------------------------------------------------------------- /clients/python/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/text_generation/__init__.py -------------------------------------------------------------------------------- /clients/python/text_generation/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/text_generation/client.py -------------------------------------------------------------------------------- /clients/python/text_generation/errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/text_generation/errors.py -------------------------------------------------------------------------------- /clients/python/text_generation/inference_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/text_generation/inference_api.py -------------------------------------------------------------------------------- /clients/python/text_generation/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/clients/python/text_generation/types.py -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/index.html -------------------------------------------------------------------------------- /docs/openapi.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/openapi.json -------------------------------------------------------------------------------- /docs/source/_toctree.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/_toctree.yml -------------------------------------------------------------------------------- /docs/source/architecture.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/architecture.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/consuming_tgi.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/consuming_tgi.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/gated_model_access.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/gated_model_access.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/monitoring.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/monitoring.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/non_core_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/non_core_models.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/preparing_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/preparing_model.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/safety.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/safety.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/train_medusa.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/train_medusa.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/using_cli.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/using_cli.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/using_guidance.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/using_guidance.md -------------------------------------------------------------------------------- /docs/source/basic_tutorials/visual_language_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/basic_tutorials/visual_language_models.md -------------------------------------------------------------------------------- /docs/source/conceptual/external.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/external.md -------------------------------------------------------------------------------- /docs/source/conceptual/flash_attention.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/flash_attention.md -------------------------------------------------------------------------------- /docs/source/conceptual/guidance.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/guidance.md -------------------------------------------------------------------------------- /docs/source/conceptual/lora.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/lora.md -------------------------------------------------------------------------------- /docs/source/conceptual/paged_attention.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/paged_attention.md -------------------------------------------------------------------------------- /docs/source/conceptual/quantization.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/quantization.md -------------------------------------------------------------------------------- /docs/source/conceptual/safetensors.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/safetensors.md -------------------------------------------------------------------------------- /docs/source/conceptual/speculation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/speculation.md -------------------------------------------------------------------------------- /docs/source/conceptual/streaming.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/streaming.md -------------------------------------------------------------------------------- /docs/source/conceptual/tensor_parallelism.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/conceptual/tensor_parallelism.md -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/index.md -------------------------------------------------------------------------------- /docs/source/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/installation.md -------------------------------------------------------------------------------- /docs/source/installation_amd.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/installation_amd.md -------------------------------------------------------------------------------- /docs/source/installation_gaudi.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/installation_gaudi.md -------------------------------------------------------------------------------- /docs/source/installation_inferentia.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/installation_inferentia.md -------------------------------------------------------------------------------- /docs/source/installation_intel.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/installation_intel.md -------------------------------------------------------------------------------- /docs/source/installation_nvidia.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/installation_nvidia.md -------------------------------------------------------------------------------- /docs/source/quicktour.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/quicktour.md -------------------------------------------------------------------------------- /docs/source/reference/api_reference.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/reference/api_reference.md -------------------------------------------------------------------------------- /docs/source/reference/launcher.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/reference/launcher.md -------------------------------------------------------------------------------- /docs/source/reference/metrics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/reference/metrics.md -------------------------------------------------------------------------------- /docs/source/supported_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/supported_models.md -------------------------------------------------------------------------------- /docs/source/usage_statistics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/docs/source/usage_statistics.md -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/examples/requirements.txt -------------------------------------------------------------------------------- /examples/run_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/examples/run_generation.py -------------------------------------------------------------------------------- /examples/tgi_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/examples/tgi_client.py -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/flake.lock -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/flake.nix -------------------------------------------------------------------------------- /integration-tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/conftest.py -------------------------------------------------------------------------------- /integration-tests/images/chicken_on_money.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/images/chicken_on_money.png -------------------------------------------------------------------------------- /integration-tests/images/cow_beach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/images/cow_beach.png -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_sharded.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_sharded.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma2/test_flash_gemma2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma2/test_flash_gemma2.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma2/test_flash_gemma2_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma2/test_flash_gemma2_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gpt2/test_flash_gpt2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gpt2/test_flash_gpt2.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_gpt2/test_flash_gpt2_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_gpt2/test_flash_gpt2_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_json.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_json.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_regex.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_regex.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_single_load_instance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_single_load_instance.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin24_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin24_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin24_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin24_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_llama_prefix_flashdecoding/test_flash_llama_flashdecoding.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_llama_prefix_flashdecoding/test_flash_llama_flashdecoding.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_simple.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_simple.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma_two_images.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma_two_images.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_grammar_llama/test_non_flash_llama_grammar_json.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_grammar_llama/test_non_flash_llama_grammar_json.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_idefics/test_idefics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_idefics/test_idefics.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_idefics/test_idefics_two_images.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_idefics/test_idefics_two_images.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_simple.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_simple.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_simple.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_simple.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_adapter.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_adapter.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mamba/test_mamba.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mamba/test_mamba.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mllama/test_mllama_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mllama/test_mllama_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mllama/test_mllama_simpl.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mllama/test_mllama_simpl.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mpt/test_mpt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mpt/test_mpt.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_neox/test_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_neox/test_neox.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_neox/test_neox_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_neox/test_neox_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_neox_sharded/test_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_neox_sharded/test_neox.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized_all_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized_all_params.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded_load.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json -------------------------------------------------------------------------------- /integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json -------------------------------------------------------------------------------- /integration-tests/models/test_bloom_560m.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_bloom_560m.py -------------------------------------------------------------------------------- /integration-tests/models/test_bloom_560m_sharded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_bloom_560m_sharded.py -------------------------------------------------------------------------------- /integration-tests/models/test_chat_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_chat_llama.py -------------------------------------------------------------------------------- /integration-tests/models/test_completion_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_completion_prompts.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_awq.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_awq_sharded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_awq_sharded.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_deepseek_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_deepseek_v2.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_falcon.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_gemma.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_gemma2.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_gemma_gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_gemma_gptq.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_gpt2.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_grammar_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_grammar_llama.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama_exl2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama_exl2.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama_fp8.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama_gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama_gptq.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama_marlin.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama_marlin_24.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama_marlin_24.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama_prefix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama_prefix.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_llama_prefix_flashdecoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_llama_prefix_flashdecoding.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_medusa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_medusa.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_mistral.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_mixtral.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_mixtral_gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_mixtral_gptq.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_neox.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_neox_sharded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_neox_sharded.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_pali_gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_pali_gemma.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_phi.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_phi35_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_phi35_moe.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_qwen2.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_santacoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_santacoder.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_starcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_starcoder.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_starcoder2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_starcoder2.py -------------------------------------------------------------------------------- /integration-tests/models/test_flash_starcoder_gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_flash_starcoder_gptq.py -------------------------------------------------------------------------------- /integration-tests/models/test_grammar_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_grammar_llama.py -------------------------------------------------------------------------------- /integration-tests/models/test_grammar_response_format_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_grammar_response_format_llama.py -------------------------------------------------------------------------------- /integration-tests/models/test_idefics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_idefics.py -------------------------------------------------------------------------------- /integration-tests/models/test_idefics2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_idefics2.py -------------------------------------------------------------------------------- /integration-tests/models/test_llava_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_llava_next.py -------------------------------------------------------------------------------- /integration-tests/models/test_lora_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_lora_mistral.py -------------------------------------------------------------------------------- /integration-tests/models/test_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_mamba.py -------------------------------------------------------------------------------- /integration-tests/models/test_mllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_mllama.py -------------------------------------------------------------------------------- /integration-tests/models/test_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_mpt.py -------------------------------------------------------------------------------- /integration-tests/models/test_mt0_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_mt0_base.py -------------------------------------------------------------------------------- /integration-tests/models/test_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_neox.py -------------------------------------------------------------------------------- /integration-tests/models/test_neox_sharded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_neox_sharded.py -------------------------------------------------------------------------------- /integration-tests/models/test_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_opt.py -------------------------------------------------------------------------------- /integration-tests/models/test_t5_sharded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_t5_sharded.py -------------------------------------------------------------------------------- /integration-tests/models/test_tools_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/models/test_tools_llama.py -------------------------------------------------------------------------------- /integration-tests/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/poetry.lock -------------------------------------------------------------------------------- /integration-tests/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/pyproject.toml -------------------------------------------------------------------------------- /integration-tests/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/pytest.ini -------------------------------------------------------------------------------- /integration-tests/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/integration-tests/requirements.txt -------------------------------------------------------------------------------- /launcher/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/launcher/Cargo.toml -------------------------------------------------------------------------------- /launcher/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/launcher/build.rs -------------------------------------------------------------------------------- /launcher/src/env_runtime.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/launcher/src/env_runtime.rs -------------------------------------------------------------------------------- /launcher/src/gpu.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/launcher/src/gpu.rs -------------------------------------------------------------------------------- /launcher/src/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/launcher/src/main.rs -------------------------------------------------------------------------------- /load_tests/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/load_tests/Makefile -------------------------------------------------------------------------------- /load_tests/common.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/load_tests/common.js -------------------------------------------------------------------------------- /load_tests/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/load_tests/filter.py -------------------------------------------------------------------------------- /load_tests/orca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/load_tests/orca.py -------------------------------------------------------------------------------- /nix/client.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/nix/client.nix -------------------------------------------------------------------------------- /nix/crate-overrides.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/nix/crate-overrides.nix -------------------------------------------------------------------------------- /nix/docker.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/nix/docker.nix -------------------------------------------------------------------------------- /nix/impure-shell.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/nix/impure-shell.nix -------------------------------------------------------------------------------- /nix/server.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/nix/server.nix -------------------------------------------------------------------------------- /proto/generate.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/proto/generate.proto -------------------------------------------------------------------------------- /proto/v3/generate.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/proto/v3/generate.proto -------------------------------------------------------------------------------- /router/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/Cargo.toml -------------------------------------------------------------------------------- /router/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/README.md -------------------------------------------------------------------------------- /router/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/build.rs -------------------------------------------------------------------------------- /router/src/config.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/config.rs -------------------------------------------------------------------------------- /router/src/infer/chat_template.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/infer/chat_template.rs -------------------------------------------------------------------------------- /router/src/infer/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/infer/mod.rs -------------------------------------------------------------------------------- /router/src/infer/tool_grammar.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/infer/tool_grammar.rs -------------------------------------------------------------------------------- /router/src/kserve.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/kserve.rs -------------------------------------------------------------------------------- /router/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/lib.rs -------------------------------------------------------------------------------- /router/src/logging.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/logging.rs -------------------------------------------------------------------------------- /router/src/main.rs.back: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/main.rs.back -------------------------------------------------------------------------------- /router/src/server.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/server.rs -------------------------------------------------------------------------------- /router/src/usage_stats.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/usage_stats.rs -------------------------------------------------------------------------------- /router/src/validation.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/validation.rs -------------------------------------------------------------------------------- /router/src/vertex.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/router/src/vertex.rs -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/rust-toolchain.toml -------------------------------------------------------------------------------- /sagemaker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/sagemaker-entrypoint.sh -------------------------------------------------------------------------------- /server/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/.gitignore -------------------------------------------------------------------------------- /server/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile -------------------------------------------------------------------------------- /server/Makefile-awq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-awq -------------------------------------------------------------------------------- /server/Makefile-eetq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-eetq -------------------------------------------------------------------------------- /server/Makefile-exllamav2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-exllamav2 -------------------------------------------------------------------------------- /server/Makefile-fbgemm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-fbgemm -------------------------------------------------------------------------------- /server/Makefile-flash-att: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-flash-att -------------------------------------------------------------------------------- /server/Makefile-flash-att-v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-flash-att-v2 -------------------------------------------------------------------------------- /server/Makefile-flashinfer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-flashinfer -------------------------------------------------------------------------------- /server/Makefile-lorax-punica: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-lorax-punica -------------------------------------------------------------------------------- /server/Makefile-selective-scan: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-selective-scan -------------------------------------------------------------------------------- /server/Makefile-vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/Makefile-vllm -------------------------------------------------------------------------------- /server/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/README.md -------------------------------------------------------------------------------- /server/custom_kernels/custom_kernels/fused_attention_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/custom_kernels/custom_kernels/fused_attention_cuda.cu -------------------------------------------------------------------------------- /server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu -------------------------------------------------------------------------------- /server/custom_kernels/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/custom_kernels/setup.py -------------------------------------------------------------------------------- /server/dill-0.3.7-patch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/dill-0.3.7-patch.sh -------------------------------------------------------------------------------- /server/dill-0.3.8-patch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/dill-0.3.8-patch.sh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cu_compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cu_compat.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_buffers.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_buffers.cu -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_buffers.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_buffers.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cu -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cu -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/exllama_ext.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/exllama_ext.cpp -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/hip_compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/hip_compat.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/matrix.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/tuning.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/tuning.h -------------------------------------------------------------------------------- /server/exllama_kernels/exllama_kernels/util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/exllama_kernels/util.cuh -------------------------------------------------------------------------------- /server/exllama_kernels/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllama_kernels/setup.py -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/config.h -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cpp/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cpp/util.h -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/compat.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel_gptq.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel_gptq.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cu -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_3.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_3.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_util.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh -------------------------------------------------------------------------------- /server/exllamav2_kernels/exllamav2_kernels/ext.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/exllamav2_kernels/ext.cpp -------------------------------------------------------------------------------- /server/exllamav2_kernels/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/exllamav2_kernels/setup.py -------------------------------------------------------------------------------- /server/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/poetry.lock -------------------------------------------------------------------------------- /server/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/pyproject.toml -------------------------------------------------------------------------------- /server/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/requirements.txt -------------------------------------------------------------------------------- /server/requirements_cuda.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/requirements_cuda.txt -------------------------------------------------------------------------------- /server/requirements_intel.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/requirements_intel.txt -------------------------------------------------------------------------------- /server/requirements_rocm.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/requirements_rocm.txt -------------------------------------------------------------------------------- /server/tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/conftest.py -------------------------------------------------------------------------------- /server/tests/models/test_bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/models/test_bloom.py -------------------------------------------------------------------------------- /server/tests/models/test_causal_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/models/test_causal_lm.py -------------------------------------------------------------------------------- /server/tests/models/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/models/test_model.py -------------------------------------------------------------------------------- /server/tests/models/test_santacoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/models/test_santacoder.py -------------------------------------------------------------------------------- /server/tests/models/test_seq2seq_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/models/test_seq2seq_lm.py -------------------------------------------------------------------------------- /server/tests/utils/test_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/utils/test_adapter.py -------------------------------------------------------------------------------- /server/tests/utils/test_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/utils/test_convert.py -------------------------------------------------------------------------------- /server/tests/utils/test_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/utils/test_hub.py -------------------------------------------------------------------------------- /server/tests/utils/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/utils/test_layers.py -------------------------------------------------------------------------------- /server/tests/utils/test_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/utils/test_tokens.py -------------------------------------------------------------------------------- /server/tests/utils/test_watermark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/utils/test_watermark.py -------------------------------------------------------------------------------- /server/tests/utils/test_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/tests/utils/test_weights.py -------------------------------------------------------------------------------- /server/text_generation_server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/text_generation_server/adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/adapters/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/adapters/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/adapters/config.py -------------------------------------------------------------------------------- /server/text_generation_server/adapters/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/adapters/lora.py -------------------------------------------------------------------------------- /server/text_generation_server/adapters/weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/adapters/weights.py -------------------------------------------------------------------------------- /server/text_generation_server/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/cache.py -------------------------------------------------------------------------------- /server/text_generation_server/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/cli.py -------------------------------------------------------------------------------- /server/text_generation_server/habana_quantization_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/habana_quantization_env.py -------------------------------------------------------------------------------- /server/text_generation_server/interceptor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/interceptor.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/attention/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/attention/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/attention/common.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/attention/cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/attention/cuda.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/attention/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/attention/flash_attn_triton.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/attention/flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/attention/flashinfer.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/attention/ipex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/attention/ipex.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/attention/rocm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/attention/rocm.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/awq/conversion_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/awq/conversion_utils.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/awq/quantize/qmodule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/awq/quantize/qmodule.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/bnb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/bnb.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/conv.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/eetq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/eetq.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/exl2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/exl2.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/fp8.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/gptq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/gptq/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/gptq/custom_autotune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/gptq/custom_autotune.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/gptq/exllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/gptq/exllama.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/gptq/exllamav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/gptq/exllamav2.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/gptq/quant_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/gptq/quant_linear.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/gptq/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/gptq/quantize.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/gptq/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/gptq/utils.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/layernorm.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/linear.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/lora.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/marlin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/marlin/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/marlin/fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/marlin/fp8.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/marlin/gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/marlin/gptq.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/marlin/marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/marlin/marlin.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/marlin/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/marlin/util.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/medusa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/medusa.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/mlp.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/moe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/moe/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/moe/fused_moe_rocm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/moe/fused_moe_rocm.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/moe/gptq_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/moe/gptq_marlin.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/moe/unquantized.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/moe/unquantized.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/rotary.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/speculative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/speculative.py -------------------------------------------------------------------------------- /server/text_generation_server/layers/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/layers/tensor_parallel.py -------------------------------------------------------------------------------- /server/text_generation_server/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/models/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/bloom.py -------------------------------------------------------------------------------- /server/text_generation_server/models/causal_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/causal_lm.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/bloom_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/bloom_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/clip.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_llama_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_neox_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_phi_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_phi_moe_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_phi_moe_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_rw_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/idefics2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/idefics2.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/idefics_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/idefics_config.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/idefics_image_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/idefics_image_processing.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/idefics_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/idefics_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/idefics_perceiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/idefics_perceiver.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/idefics_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/idefics_processing.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/idefics_vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/idefics_vision.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/llava_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/llava_next.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/mamba_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/mamba_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/mllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/mllama.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/mpt_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/mpt_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/neox_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/neox_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/opt_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/opt_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/phi_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/phi_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/siglip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/siglip.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/t5_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/t5_modeling.py -------------------------------------------------------------------------------- /server/text_generation_server/models/custom_modeling/vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/custom_modeling/vlm.py -------------------------------------------------------------------------------- /server/text_generation_server/models/flash_causal_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/flash_causal_lm.py -------------------------------------------------------------------------------- /server/text_generation_server/models/galactica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/galactica.py -------------------------------------------------------------------------------- /server/text_generation_server/models/globals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/globals.py -------------------------------------------------------------------------------- /server/text_generation_server/models/idefics_causal_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/idefics_causal_lm.py -------------------------------------------------------------------------------- /server/text_generation_server/models/mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/mamba.py -------------------------------------------------------------------------------- /server/text_generation_server/models/mllama_causal_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/mllama_causal_lm.py -------------------------------------------------------------------------------- /server/text_generation_server/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/model.py -------------------------------------------------------------------------------- /server/text_generation_server/models/pali_gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/pali_gemma.py -------------------------------------------------------------------------------- /server/text_generation_server/models/seq2seq_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/seq2seq_lm.py -------------------------------------------------------------------------------- /server/text_generation_server/models/starcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/starcoder.py -------------------------------------------------------------------------------- /server/text_generation_server/models/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/types.py -------------------------------------------------------------------------------- /server/text_generation_server/models/vlm_causal_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/models/vlm_causal_lm.py -------------------------------------------------------------------------------- /server/text_generation_server/pb/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/pb/.gitignore -------------------------------------------------------------------------------- /server/text_generation_server/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/server.py -------------------------------------------------------------------------------- /server/text_generation_server/tgi_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/tgi_service.py -------------------------------------------------------------------------------- /server/text_generation_server/tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/tracing.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/__init__.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/adapter.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/chunks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/chunks.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/convert.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/debug.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/dist.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/hub.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/import_utils.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/log.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/logits_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/logits_process.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/merges/strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/merges/strategies.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/merges/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/merges/utils.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/peft.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/quantization.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/segments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/segments.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/sgmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/sgmv.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/speculate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/speculate.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/tokens.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/version.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/watermark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/watermark.py -------------------------------------------------------------------------------- /server/text_generation_server/utils/weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/server/text_generation_server/utils/weights.py -------------------------------------------------------------------------------- /tgi-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/tgi-entrypoint.sh -------------------------------------------------------------------------------- /update_doc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/tgi-gaudi/HEAD/update_doc.py --------------------------------------------------------------------------------