├── .cargo └── config.toml ├── .dockerignore ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ ├── feature-request.yml │ └── new-model-addition.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── build.yaml │ ├── build_documentation.yml │ ├── build_pr_documentation.yml │ ├── integration-test.yaml │ ├── liniting.yaml │ ├── matrix.json │ ├── test.yaml │ ├── trufflehog.yml │ └── upload_pr_documentation.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── Dockerfile-cuda ├── Dockerfile-cuda-all ├── Dockerfile-intel ├── LICENSE ├── Makefile ├── README.md ├── assets ├── bs1-lat.png ├── bs1-tp.png ├── bs32-lat.png └── bs32-tp.png ├── backends ├── Cargo.toml ├── candle │ ├── Cargo.toml │ ├── build.rs │ ├── src │ │ ├── alibi.rs │ │ ├── compute_cap.rs │ │ ├── flash_attn.rs │ │ ├── layers │ │ │ ├── cublaslt.rs │ │ │ ├── layer_norm.rs │ │ │ ├── linear.rs │ │ │ ├── mod.rs │ │ │ ├── rms_norm.rs │ │ │ └── rotary.rs │ │ ├── lib.rs │ │ └── models │ │ │ ├── bert.rs │ │ │ ├── dense.rs │ │ │ ├── distilbert.rs │ │ │ ├── flash_bert.rs │ │ │ ├── flash_distilbert.rs │ │ │ ├── flash_gte.rs │ │ │ ├── flash_jina.rs │ │ │ ├── flash_jina_code.rs │ │ │ ├── flash_mistral.rs │ │ │ ├── flash_modernbert.rs │ │ │ ├── flash_nomic.rs │ │ │ ├── flash_qwen2.rs │ │ │ ├── flash_qwen3.rs │ │ │ ├── gemma3.rs │ │ │ ├── gte.rs │ │ │ ├── jina.rs │ │ │ ├── jina_code.rs │ │ │ ├── mistral.rs │ │ │ ├── mod.rs │ │ │ ├── modernbert.rs │ │ │ ├── mpnet.rs │ │ │ ├── nomic.rs │ │ │ ├── qwen2.rs │ │ │ └── qwen3.rs │ └── tests │ │ ├── common.rs │ │ ├── snapshots │ │ ├── test_bert__bert_batch.snap │ │ ├── test_bert__bert_batch_pooled.snap │ │ ├── test_bert__bert_batch_raw.snap │ │ ├── test_bert__bert_classification_single.snap │ │ ├── test_bert__bert_single.snap │ │ ├── test_bert__bert_single_pooled.snap │ │ ├── test_bert__bert_single_raw.snap │ │ ├── test_bert__emotions_batch.snap │ │ ├── test_bert__emotions_single.snap │ │ ├── test_dense__stella_en_400m_v5_default_dense_batch.snap │ │ ├── test_dense__stella_en_400m_v5_default_dense_single.snap │ │ ├── test_dense__stella_en_400m_v5_dense_768_batch.snap │ │ ├── test_dense__stella_en_400m_v5_dense_768_single.snap │ │ ├── test_flash_bert__bert_classification_single.snap │ │ ├── test_flash_bert__emotions_batch.snap │ │ ├── test_flash_bert__emotions_single.snap │ │ ├── test_flash_bert__mini_batch.snap │ │ ├── test_flash_bert__mini_batch_pooled.snap │ │ ├── test_flash_bert__mini_batch_raw.snap │ │ ├── test_flash_bert__mini_single.snap │ │ ├── test_flash_bert__mini_single_pooled.snap │ │ ├── test_flash_bert__mini_single_raw.snap │ │ ├── test_flash_gte__gte_batch.snap │ │ ├── test_flash_gte__gte_classification_single.snap │ │ ├── test_flash_gte__gte_single.snap │ │ ├── test_flash_jina__jina_batch.snap │ │ ├── test_flash_jina__jina_single.snap │ │ ├── test_flash_jina_code__jina_code_batch.snap │ │ ├── test_flash_jina_code__jina_code_single.snap │ │ ├── test_flash_mistral__mistral_batch.snap │ │ ├── test_flash_mistral__mistral_single.snap │ │ ├── test_flash_nomic__nomic_batch.snap │ │ ├── test_flash_nomic__nomic_moe_batch.snap │ │ ├── test_flash_nomic__nomic_moe_single.snap │ │ ├── test_flash_nomic__nomic_single.snap │ │ ├── test_flash_qwen2__qwen2_batch.snap │ │ ├── test_flash_qwen2__qwen2_single.snap │ │ ├── test_flash_qwen3__qwen3_batch.snap │ │ ├── test_flash_qwen3__qwen3_single.snap │ │ ├── test_gemma3__gemma3_cpu_batch.snap │ │ ├── test_gemma3__gemma3_cpu_single.snap │ │ ├── test_gte__alibaba_gte_batch.snap │ │ ├── test_gte__alibaba_gte_single.snap │ │ ├── test_gte__alibaba_new_gte_batch.snap │ │ ├── test_gte__alibaba_new_gte_single.snap │ │ ├── test_gte__gte_classification_single.snap │ │ ├── test_gte__snowflake_gte_batch.snap │ │ ├── test_gte__snowflake_gte_single.snap │ │ ├── test_jina__jina_batch.snap │ │ ├── test_jina__jina_single.snap │ │ ├── test_jina__jinabert_reranker_single.snap │ │ ├── test_jina_code__jina_code_batch.snap │ │ ├── test_jina_code__jina_code_single.snap │ │ ├── test_modernbert__modernbert_batch.snap │ │ ├── test_modernbert__modernbert_batch_flash.snap │ │ ├── test_modernbert__modernbert_batch_pooled.snap │ │ ├── test_modernbert__modernbert_batch_pooled_flash.snap │ │ ├── test_modernbert__modernbert_batch_raw.snap │ │ ├── test_modernbert__modernbert_batch_raw_flash.snap │ │ ├── test_modernbert__modernbert_classification_mean_pooling.snap │ │ ├── test_modernbert__modernbert_classification_single.snap │ │ ├── test_modernbert__modernbert_single.snap │ │ ├── test_modernbert__modernbert_single_flash.snap │ │ ├── test_modernbert__modernbert_single_pooled.snap │ │ ├── test_modernbert__modernbert_single_pooled_flash.snap │ │ ├── test_modernbert__modernbert_single_raw.snap │ │ ├── test_modernbert__modernbert_single_raw_flash.snap │ │ ├── test_mpnet__mpnet_batch.snap │ │ ├── test_mpnet__mpnet_batch_pooled.snap │ │ ├── test_mpnet__mpnet_batch_raw.snap │ │ ├── test_mpnet__mpnet_single.snap │ │ ├── test_mpnet__mpnet_single_pooled.snap │ │ ├── test_mpnet__mpnet_single_raw.snap │ │ ├── test_nomic__nomic_batch.snap │ │ ├── test_nomic__nomic_moe_batch.snap │ │ ├── test_nomic__nomic_moe_single.snap │ │ ├── test_nomic__nomic_single.snap │ │ ├── test_qwen3__qwen3_cpu_batch.snap │ │ └── test_qwen3__qwen3_cpu_single.snap │ │ ├── test_bert.rs │ │ ├── test_dense.rs │ │ ├── test_flash_bert.rs │ │ ├── test_flash_gte.rs │ │ ├── test_flash_jina.rs │ │ ├── test_flash_jina_code.rs │ │ ├── test_flash_mistral.rs │ │ ├── test_flash_nomic.rs │ │ ├── test_flash_qwen2.rs │ │ ├── test_flash_qwen3.rs │ │ ├── test_gemma3.rs │ │ ├── test_gte.rs │ │ ├── test_jina.rs │ │ ├── test_jina_code.rs │ │ ├── test_modernbert.rs │ │ ├── test_mpnet.rs │ │ ├── test_nomic.rs │ │ └── test_qwen3.rs ├── core │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── grpc-client │ ├── Cargo.toml │ ├── build.rs │ └── src │ │ ├── client.rs │ │ ├── lib.rs │ │ └── pb │ │ └── .gitignore ├── grpc-metadata │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── ort │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── proto │ └── embed.proto ├── python │ ├── Cargo.toml │ ├── server │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── Makefile-flash-att │ │ ├── Makefile-flash-att-v2 │ │ ├── README.md │ │ ├── poetry.lock │ │ ├── pyproject.toml │ │ ├── requirements-hpu.txt │ │ ├── requirements-intel.txt │ │ ├── requirements.txt │ │ └── text_embeddings_server │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── classification_model.py │ │ │ ├── default_model.py │ │ │ ├── flash_bert.py │ │ │ ├── flash_mistral.py │ │ │ ├── flash_qwen3.py │ │ │ ├── jinaBert_model.py │ │ │ ├── masked_model.py │ │ │ ├── model.py │ │ │ ├── pooling.py │ │ │ └── types.py │ │ │ ├── pb │ │ │ └── .gitignore │ │ │ ├── server.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── device.py │ │ │ ├── flash_attn.py │ │ │ ├── interceptor.py │ │ │ └── tracing.py │ └── src │ │ ├── lib.rs │ │ ├── logging.rs │ │ └── management.rs └── src │ ├── dtype.rs │ └── lib.rs ├── core ├── Cargo.toml └── src │ ├── download.rs │ ├── infer.rs │ ├── lib.rs │ ├── queue.rs │ └── tokenization.rs ├── cuda-all-entrypoint.sh ├── docs ├── index.html ├── openapi.json └── source │ └── en │ ├── _toctree.yml │ ├── cli_arguments.md │ ├── custom_container.md │ ├── examples.md │ ├── index.md │ ├── intel_container.md │ ├── local_cpu.md │ ├── local_gpu.md │ ├── local_metal.md │ ├── private_models.md │ ├── quick_tour.md │ ├── supported_models.md │ └── tei_cloud_run.md ├── flake.lock ├── flake.nix ├── integration_tests ├── README.md ├── gaudi │ ├── conftest.py │ └── test_embed.py ├── pyproject.toml └── uv.lock ├── load_tests ├── load.js ├── load_grpc.js └── load_grpc_stream.js ├── proto └── tei.proto ├── router ├── Cargo.toml ├── build.rs ├── src │ ├── grpc │ │ ├── mod.rs │ │ ├── pb │ │ │ └── .gitignore │ │ └── server.rs │ ├── http │ │ ├── mod.rs │ │ ├── server.rs │ │ └── types.rs │ ├── lib.rs │ ├── logging.rs │ ├── main.rs │ ├── prometheus.rs │ └── shutdown.rs └── tests │ ├── common.rs │ ├── snapshots │ ├── test_http_embed__mrl_embeddings_batch.snap │ └── test_http_embed__mrl_embeddings_single.snap │ ├── test_http_embed.rs │ ├── test_http_predict.rs │ └── test_http_rerank.rs ├── rust-toolchain.toml ├── sagemaker-entrypoint-cuda-all.sh └── sagemaker-entrypoint.sh /.cargo/config.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.cargo/config.toml -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.gitattributes -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/ISSUE_TEMPLATE/bug-report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 2.1 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/ISSUE_TEMPLATE/feature-request.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-model-addition.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/ISSUE_TEMPLATE/new-model-addition.yml -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/build.yaml -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/build_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/build_pr_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/integration-test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/integration-test.yaml -------------------------------------------------------------------------------- /.github/workflows/liniting.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/liniting.yaml -------------------------------------------------------------------------------- /.github/workflows/matrix.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/matrix.json -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/test.yaml -------------------------------------------------------------------------------- /.github/workflows/trufflehog.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/trufflehog.yml -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.github/workflows/upload_pr_documentation.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target 3 | data/ 4 | __pycache__/ 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/Cargo.lock -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/Cargo.toml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/Dockerfile -------------------------------------------------------------------------------- /Dockerfile-cuda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/Dockerfile-cuda -------------------------------------------------------------------------------- /Dockerfile-cuda-all: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/Dockerfile-cuda-all -------------------------------------------------------------------------------- /Dockerfile-intel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/Dockerfile-intel -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/README.md -------------------------------------------------------------------------------- /assets/bs1-lat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/assets/bs1-lat.png -------------------------------------------------------------------------------- /assets/bs1-tp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/assets/bs1-tp.png -------------------------------------------------------------------------------- /assets/bs32-lat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/assets/bs32-lat.png -------------------------------------------------------------------------------- /assets/bs32-tp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/assets/bs32-tp.png -------------------------------------------------------------------------------- /backends/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/Cargo.toml -------------------------------------------------------------------------------- /backends/candle/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/Cargo.toml -------------------------------------------------------------------------------- /backends/candle/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/build.rs -------------------------------------------------------------------------------- /backends/candle/src/alibi.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/alibi.rs -------------------------------------------------------------------------------- /backends/candle/src/compute_cap.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/compute_cap.rs -------------------------------------------------------------------------------- /backends/candle/src/flash_attn.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/flash_attn.rs -------------------------------------------------------------------------------- /backends/candle/src/layers/cublaslt.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/layers/cublaslt.rs -------------------------------------------------------------------------------- /backends/candle/src/layers/layer_norm.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/layers/layer_norm.rs -------------------------------------------------------------------------------- /backends/candle/src/layers/linear.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/layers/linear.rs -------------------------------------------------------------------------------- /backends/candle/src/layers/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/layers/mod.rs -------------------------------------------------------------------------------- /backends/candle/src/layers/rms_norm.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/layers/rms_norm.rs -------------------------------------------------------------------------------- /backends/candle/src/layers/rotary.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/layers/rotary.rs -------------------------------------------------------------------------------- /backends/candle/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/lib.rs -------------------------------------------------------------------------------- /backends/candle/src/models/bert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/bert.rs -------------------------------------------------------------------------------- /backends/candle/src/models/dense.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/dense.rs -------------------------------------------------------------------------------- /backends/candle/src/models/distilbert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/distilbert.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_bert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_bert.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_distilbert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_distilbert.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_gte.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_gte.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_jina.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_jina.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_jina_code.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_jina_code.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_mistral.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_mistral.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_modernbert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_modernbert.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_nomic.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_nomic.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_qwen2.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_qwen2.rs -------------------------------------------------------------------------------- /backends/candle/src/models/flash_qwen3.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/flash_qwen3.rs -------------------------------------------------------------------------------- /backends/candle/src/models/gemma3.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/gemma3.rs -------------------------------------------------------------------------------- /backends/candle/src/models/gte.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/gte.rs -------------------------------------------------------------------------------- /backends/candle/src/models/jina.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/jina.rs -------------------------------------------------------------------------------- /backends/candle/src/models/jina_code.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/jina_code.rs -------------------------------------------------------------------------------- /backends/candle/src/models/mistral.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/mistral.rs -------------------------------------------------------------------------------- /backends/candle/src/models/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/mod.rs -------------------------------------------------------------------------------- /backends/candle/src/models/modernbert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/modernbert.rs -------------------------------------------------------------------------------- /backends/candle/src/models/mpnet.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/mpnet.rs -------------------------------------------------------------------------------- /backends/candle/src/models/nomic.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/nomic.rs -------------------------------------------------------------------------------- /backends/candle/src/models/qwen2.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/qwen2.rs -------------------------------------------------------------------------------- /backends/candle/src/models/qwen3.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/src/models/qwen3.rs -------------------------------------------------------------------------------- /backends/candle/tests/common.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/common.rs -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__bert_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__bert_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__bert_batch_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__bert_batch_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__bert_batch_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__bert_batch_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__bert_classification_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__bert_classification_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__bert_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__bert_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__bert_single_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__bert_single_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__bert_single_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__bert_single_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__emotions_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__emotions_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_bert__emotions_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_bert__emotions_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_default_dense_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_default_dense_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_default_dense_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_default_dense_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_dense_768_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_dense_768_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_dense_768_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_dense__stella_en_400m_v5_dense_768_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__bert_classification_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__bert_classification_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__emotions_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__emotions_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__emotions_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__emotions_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__mini_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__mini_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__mini_batch_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__mini_batch_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__mini_batch_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__mini_batch_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__mini_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__mini_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__mini_single_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__mini_single_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_bert__mini_single_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_bert__mini_single_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_gte__gte_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_gte__gte_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_gte__gte_classification_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_gte__gte_classification_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_gte__gte_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_gte__gte_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_jina__jina_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_jina__jina_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_jina__jina_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_jina__jina_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_jina_code__jina_code_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_jina_code__jina_code_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_jina_code__jina_code_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_jina_code__jina_code_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_mistral__mistral_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_mistral__mistral_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_mistral__mistral_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_mistral__mistral_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_nomic__nomic_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_nomic__nomic_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_nomic__nomic_moe_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_nomic__nomic_moe_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_nomic__nomic_moe_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_nomic__nomic_moe_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_nomic__nomic_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_nomic__nomic_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_qwen2__qwen2_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_qwen2__qwen2_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_qwen2__qwen2_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_qwen2__qwen2_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_qwen3__qwen3_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_qwen3__qwen3_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_flash_qwen3__qwen3_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_flash_qwen3__qwen3_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gemma3__gemma3_cpu_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gemma3__gemma3_cpu_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gemma3__gemma3_cpu_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gemma3__gemma3_cpu_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gte__alibaba_gte_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gte__alibaba_gte_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gte__alibaba_gte_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gte__alibaba_gte_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gte__alibaba_new_gte_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gte__alibaba_new_gte_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gte__alibaba_new_gte_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gte__alibaba_new_gte_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gte__gte_classification_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gte__gte_classification_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gte__snowflake_gte_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gte__snowflake_gte_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_gte__snowflake_gte_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_gte__snowflake_gte_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_jina__jina_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_jina__jina_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_jina__jina_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_jina__jina_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_jina__jinabert_reranker_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_jina__jinabert_reranker_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_jina_code__jina_code_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_jina_code__jina_code_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_jina_code__jina_code_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_jina_code__jina_code_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_batch_flash.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_batch_flash.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_batch_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_batch_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_batch_pooled_flash.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_batch_pooled_flash.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_batch_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_batch_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_batch_raw_flash.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_batch_raw_flash.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_classification_mean_pooling.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_classification_mean_pooling.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_classification_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_classification_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_single_flash.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_single_flash.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_single_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_single_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_single_pooled_flash.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_single_pooled_flash.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_single_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_single_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_modernbert__modernbert_single_raw_flash.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_modernbert__modernbert_single_raw_flash.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_mpnet__mpnet_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_mpnet__mpnet_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_mpnet__mpnet_batch_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_mpnet__mpnet_batch_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_mpnet__mpnet_batch_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_mpnet__mpnet_batch_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_mpnet__mpnet_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_mpnet__mpnet_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_mpnet__mpnet_single_pooled.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_mpnet__mpnet_single_pooled.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_mpnet__mpnet_single_raw.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_mpnet__mpnet_single_raw.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_nomic__nomic_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_nomic__nomic_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_nomic__nomic_moe_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_nomic__nomic_moe_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_nomic__nomic_moe_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_nomic__nomic_moe_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_nomic__nomic_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_nomic__nomic_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_qwen3__qwen3_cpu_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_qwen3__qwen3_cpu_batch.snap -------------------------------------------------------------------------------- /backends/candle/tests/snapshots/test_qwen3__qwen3_cpu_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/snapshots/test_qwen3__qwen3_cpu_single.snap -------------------------------------------------------------------------------- /backends/candle/tests/test_bert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_bert.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_dense.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_dense.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_bert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_bert.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_gte.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_gte.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_jina.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_jina.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_jina_code.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_jina_code.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_mistral.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_mistral.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_nomic.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_nomic.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_qwen2.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_qwen2.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_flash_qwen3.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_flash_qwen3.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_gemma3.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_gemma3.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_gte.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_gte.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_jina.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_jina.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_jina_code.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_jina_code.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_modernbert.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_modernbert.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_mpnet.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_mpnet.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_nomic.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_nomic.rs -------------------------------------------------------------------------------- /backends/candle/tests/test_qwen3.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/candle/tests/test_qwen3.rs -------------------------------------------------------------------------------- /backends/core/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/core/Cargo.toml -------------------------------------------------------------------------------- /backends/core/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/core/src/lib.rs -------------------------------------------------------------------------------- /backends/grpc-client/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/grpc-client/Cargo.toml -------------------------------------------------------------------------------- /backends/grpc-client/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/grpc-client/build.rs -------------------------------------------------------------------------------- /backends/grpc-client/src/client.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/grpc-client/src/client.rs -------------------------------------------------------------------------------- /backends/grpc-client/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/grpc-client/src/lib.rs -------------------------------------------------------------------------------- /backends/grpc-client/src/pb/.gitignore: -------------------------------------------------------------------------------- 1 | *.rs 2 | -------------------------------------------------------------------------------- /backends/grpc-metadata/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/grpc-metadata/Cargo.toml -------------------------------------------------------------------------------- /backends/grpc-metadata/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/grpc-metadata/src/lib.rs -------------------------------------------------------------------------------- /backends/ort/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/ort/Cargo.toml -------------------------------------------------------------------------------- /backends/ort/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/ort/src/lib.rs -------------------------------------------------------------------------------- /backends/proto/embed.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/proto/embed.proto -------------------------------------------------------------------------------- /backends/python/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/Cargo.toml -------------------------------------------------------------------------------- /backends/python/server/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/.gitignore -------------------------------------------------------------------------------- /backends/python/server/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/Makefile -------------------------------------------------------------------------------- /backends/python/server/Makefile-flash-att: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/Makefile-flash-att -------------------------------------------------------------------------------- /backends/python/server/Makefile-flash-att-v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/Makefile-flash-att-v2 -------------------------------------------------------------------------------- /backends/python/server/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/README.md -------------------------------------------------------------------------------- /backends/python/server/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/poetry.lock -------------------------------------------------------------------------------- /backends/python/server/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/pyproject.toml -------------------------------------------------------------------------------- /backends/python/server/requirements-hpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/requirements-hpu.txt -------------------------------------------------------------------------------- /backends/python/server/requirements-intel.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/requirements-intel.txt -------------------------------------------------------------------------------- /backends/python/server/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/requirements.txt -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/cli.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/__init__.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/classification_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/classification_model.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/default_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/default_model.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/flash_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/flash_bert.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/flash_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/flash_mistral.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/flash_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/flash_qwen3.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/jinaBert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/jinaBert_model.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/masked_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/masked_model.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/model.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/pooling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/pooling.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/models/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/models/types.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/pb/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/pb/.gitignore -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/server.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/utils/device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/utils/device.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/utils/flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/utils/flash_attn.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/utils/interceptor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/utils/interceptor.py -------------------------------------------------------------------------------- /backends/python/server/text_embeddings_server/utils/tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/server/text_embeddings_server/utils/tracing.py -------------------------------------------------------------------------------- /backends/python/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/src/lib.rs -------------------------------------------------------------------------------- /backends/python/src/logging.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/src/logging.rs -------------------------------------------------------------------------------- /backends/python/src/management.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/python/src/management.rs -------------------------------------------------------------------------------- /backends/src/dtype.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/src/dtype.rs -------------------------------------------------------------------------------- /backends/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/backends/src/lib.rs -------------------------------------------------------------------------------- /core/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/core/Cargo.toml -------------------------------------------------------------------------------- /core/src/download.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/core/src/download.rs -------------------------------------------------------------------------------- /core/src/infer.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/core/src/infer.rs -------------------------------------------------------------------------------- /core/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/core/src/lib.rs -------------------------------------------------------------------------------- /core/src/queue.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/core/src/queue.rs -------------------------------------------------------------------------------- /core/src/tokenization.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/core/src/tokenization.rs -------------------------------------------------------------------------------- /cuda-all-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/cuda-all-entrypoint.sh -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/index.html -------------------------------------------------------------------------------- /docs/openapi.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/openapi.json -------------------------------------------------------------------------------- /docs/source/en/_toctree.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/_toctree.yml -------------------------------------------------------------------------------- /docs/source/en/cli_arguments.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/cli_arguments.md -------------------------------------------------------------------------------- /docs/source/en/custom_container.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/custom_container.md -------------------------------------------------------------------------------- /docs/source/en/examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/examples.md -------------------------------------------------------------------------------- /docs/source/en/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/index.md -------------------------------------------------------------------------------- /docs/source/en/intel_container.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/intel_container.md -------------------------------------------------------------------------------- /docs/source/en/local_cpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/local_cpu.md -------------------------------------------------------------------------------- /docs/source/en/local_gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/local_gpu.md -------------------------------------------------------------------------------- /docs/source/en/local_metal.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/local_metal.md -------------------------------------------------------------------------------- /docs/source/en/private_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/private_models.md -------------------------------------------------------------------------------- /docs/source/en/quick_tour.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/quick_tour.md -------------------------------------------------------------------------------- /docs/source/en/supported_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/supported_models.md -------------------------------------------------------------------------------- /docs/source/en/tei_cloud_run.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/docs/source/en/tei_cloud_run.md -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/flake.lock -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/flake.nix -------------------------------------------------------------------------------- /integration_tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/integration_tests/README.md -------------------------------------------------------------------------------- /integration_tests/gaudi/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/integration_tests/gaudi/conftest.py -------------------------------------------------------------------------------- /integration_tests/gaudi/test_embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/integration_tests/gaudi/test_embed.py -------------------------------------------------------------------------------- /integration_tests/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/integration_tests/pyproject.toml -------------------------------------------------------------------------------- /integration_tests/uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/integration_tests/uv.lock -------------------------------------------------------------------------------- /load_tests/load.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/load_tests/load.js -------------------------------------------------------------------------------- /load_tests/load_grpc.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/load_tests/load_grpc.js -------------------------------------------------------------------------------- /load_tests/load_grpc_stream.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/load_tests/load_grpc_stream.js -------------------------------------------------------------------------------- /proto/tei.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/proto/tei.proto -------------------------------------------------------------------------------- /router/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/Cargo.toml -------------------------------------------------------------------------------- /router/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/build.rs -------------------------------------------------------------------------------- /router/src/grpc/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/grpc/mod.rs -------------------------------------------------------------------------------- /router/src/grpc/pb/.gitignore: -------------------------------------------------------------------------------- 1 | *.rs 2 | -------------------------------------------------------------------------------- /router/src/grpc/server.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/grpc/server.rs -------------------------------------------------------------------------------- /router/src/http/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/http/mod.rs -------------------------------------------------------------------------------- /router/src/http/server.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/http/server.rs -------------------------------------------------------------------------------- /router/src/http/types.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/http/types.rs -------------------------------------------------------------------------------- /router/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/lib.rs -------------------------------------------------------------------------------- /router/src/logging.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/logging.rs -------------------------------------------------------------------------------- /router/src/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/main.rs -------------------------------------------------------------------------------- /router/src/prometheus.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/prometheus.rs -------------------------------------------------------------------------------- /router/src/shutdown.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/src/shutdown.rs -------------------------------------------------------------------------------- /router/tests/common.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/tests/common.rs -------------------------------------------------------------------------------- /router/tests/snapshots/test_http_embed__mrl_embeddings_batch.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/tests/snapshots/test_http_embed__mrl_embeddings_batch.snap -------------------------------------------------------------------------------- /router/tests/snapshots/test_http_embed__mrl_embeddings_single.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/tests/snapshots/test_http_embed__mrl_embeddings_single.snap -------------------------------------------------------------------------------- /router/tests/test_http_embed.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/tests/test_http_embed.rs -------------------------------------------------------------------------------- /router/tests/test_http_predict.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/tests/test_http_predict.rs -------------------------------------------------------------------------------- /router/tests/test_http_rerank.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/router/tests/test_http_rerank.rs -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/rust-toolchain.toml -------------------------------------------------------------------------------- /sagemaker-entrypoint-cuda-all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/sagemaker-entrypoint-cuda-all.sh -------------------------------------------------------------------------------- /sagemaker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/text-embeddings-inference/HEAD/sagemaker-entrypoint.sh --------------------------------------------------------------------------------