├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── LICENSE ├── Makefile ├── README.md ├── assets └── images │ ├── logo_v2.png │ ├── model2vec_logo.png │ ├── speed_vs_mteb_score_v3.png │ ├── training_speed_vs_score.png │ └── tutorial_ezlo.png ├── docs └── README.md ├── model2vec ├── __init__.py ├── distill │ ├── __init__.py │ ├── distillation.py │ ├── inference.py │ └── utils.py ├── hf_utils.py ├── inference │ ├── README.md │ ├── __init__.py │ └── model.py ├── model.py ├── modelcards │ ├── classifier_template.md │ └── model_card_template.md ├── py.typed ├── quantization.py ├── tokenizer │ ├── __init__.py │ ├── datamodels.py │ ├── model.py │ ├── normalizer.py │ ├── pretokenizer.py │ └── tokenizer.py ├── train │ ├── README.md │ ├── __init__.py │ ├── base.py │ └── classifier.py ├── utils.py ├── version.py └── vocabulary_quantization.py ├── pyproject.toml ├── results ├── README.md └── make_speed_vs_mteb_plot.py ├── scripts └── export_to_onnx.py ├── tests ├── __init__.py ├── conftest.py ├── data │ └── test_tokenizer │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ └── tokenizer_config.json ├── test_distillation.py ├── test_inference.py ├── test_model.py ├── test_quantization.py ├── test_tokenizer.py ├── test_trainable.py └── test_utils.py ├── tutorials ├── README.md ├── recipe_search.ipynb ├── semantic_chunking.ipynb └── train_classifier.ipynb └── uv.lock /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/.github/workflows/ci.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/CITATION.cff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/README.md -------------------------------------------------------------------------------- /assets/images/logo_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/assets/images/logo_v2.png -------------------------------------------------------------------------------- /assets/images/model2vec_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/assets/images/model2vec_logo.png -------------------------------------------------------------------------------- /assets/images/speed_vs_mteb_score_v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/assets/images/speed_vs_mteb_score_v3.png -------------------------------------------------------------------------------- /assets/images/training_speed_vs_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/assets/images/training_speed_vs_score.png -------------------------------------------------------------------------------- /assets/images/tutorial_ezlo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/assets/images/tutorial_ezlo.png -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/docs/README.md -------------------------------------------------------------------------------- /model2vec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/__init__.py -------------------------------------------------------------------------------- /model2vec/distill/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/distill/__init__.py -------------------------------------------------------------------------------- /model2vec/distill/distillation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/distill/distillation.py -------------------------------------------------------------------------------- /model2vec/distill/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/distill/inference.py -------------------------------------------------------------------------------- /model2vec/distill/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/distill/utils.py -------------------------------------------------------------------------------- /model2vec/hf_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/hf_utils.py -------------------------------------------------------------------------------- /model2vec/inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/inference/README.md -------------------------------------------------------------------------------- /model2vec/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/inference/__init__.py -------------------------------------------------------------------------------- /model2vec/inference/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/inference/model.py -------------------------------------------------------------------------------- /model2vec/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/model.py -------------------------------------------------------------------------------- /model2vec/modelcards/classifier_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/modelcards/classifier_template.md -------------------------------------------------------------------------------- /model2vec/modelcards/model_card_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/modelcards/model_card_template.md -------------------------------------------------------------------------------- /model2vec/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model2vec/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/quantization.py -------------------------------------------------------------------------------- /model2vec/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/tokenizer/__init__.py -------------------------------------------------------------------------------- /model2vec/tokenizer/datamodels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/tokenizer/datamodels.py -------------------------------------------------------------------------------- /model2vec/tokenizer/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/tokenizer/model.py -------------------------------------------------------------------------------- /model2vec/tokenizer/normalizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/tokenizer/normalizer.py -------------------------------------------------------------------------------- /model2vec/tokenizer/pretokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/tokenizer/pretokenizer.py -------------------------------------------------------------------------------- /model2vec/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /model2vec/train/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/train/README.md -------------------------------------------------------------------------------- /model2vec/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/train/__init__.py -------------------------------------------------------------------------------- /model2vec/train/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/train/base.py -------------------------------------------------------------------------------- /model2vec/train/classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/train/classifier.py -------------------------------------------------------------------------------- /model2vec/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/utils.py -------------------------------------------------------------------------------- /model2vec/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/version.py -------------------------------------------------------------------------------- /model2vec/vocabulary_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/model2vec/vocabulary_quantization.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/pyproject.toml -------------------------------------------------------------------------------- /results/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/results/README.md -------------------------------------------------------------------------------- /results/make_speed_vs_mteb_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/results/make_speed_vs_mteb_plot.py -------------------------------------------------------------------------------- /scripts/export_to_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/scripts/export_to_onnx.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/data/test_tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/data/test_tokenizer/special_tokens_map.json -------------------------------------------------------------------------------- /tests/data/test_tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/data/test_tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/data/test_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/data/test_tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /tests/test_distillation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/test_distillation.py -------------------------------------------------------------------------------- /tests/test_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/test_inference.py -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/test_model.py -------------------------------------------------------------------------------- /tests/test_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/test_quantization.py -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/test_tokenizer.py -------------------------------------------------------------------------------- /tests/test_trainable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/test_trainable.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tutorials/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tutorials/README.md -------------------------------------------------------------------------------- /tutorials/recipe_search.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tutorials/recipe_search.ipynb -------------------------------------------------------------------------------- /tutorials/semantic_chunking.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tutorials/semantic_chunking.ipynb -------------------------------------------------------------------------------- /tutorials/train_classifier.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/tutorials/train_classifier.ipynb -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MinishLab/model2vec/HEAD/uv.lock --------------------------------------------------------------------------------