├── .gitignore ├── CITATION.cff ├── LICENSE ├── Makefile ├── README.md ├── _typos.toml ├── changelog.md ├── conftest.py ├── dev_env.yml ├── docs ├── advanced_retriever.md ├── dense_retriever.md ├── faq.md ├── filters.md ├── hybrid_retriever.md ├── sparse_retriever.md ├── speed.md └── text_preprocessing.md ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── retriv ├── __init__.py ├── autotune │ ├── __init__.py │ ├── bm25_autotune.py │ └── merger_autotune.py ├── base_retriever.py ├── dense_retriever │ ├── __init__.py │ ├── ann_searcher.py │ ├── dense_retriever.py │ └── encoder.py ├── experimental │ ├── __init__.py │ └── advanced_retriever.py ├── hybrid_retriever.py ├── merger │ ├── __init__.py │ ├── merger.py │ └── normalization.py ├── paths.py ├── sparse_retriever │ ├── __init__.py │ ├── build_inverted_index.py │ ├── preprocessing │ │ ├── __init__.py │ │ ├── normalization.py │ │ ├── stemmer.py │ │ ├── stopwords.py │ │ ├── tokenizer.py │ │ └── utils.py │ ├── sparse_retrieval_models │ │ ├── __init__.py │ │ ├── bm25.py │ │ └── tf_idf.py │ └── sparse_retriever.py └── utils │ ├── __init__.py │ └── numba_utils.py ├── setup.py ├── test_env.yml └── tests ├── advanced_retriever └── advanced_retriever_test.py ├── dense_retriever └── encoder_test.py ├── merger ├── merger_test.py └── score_normalization_test.py ├── numba_utils_test.py └── sparse_retriever ├── preprocessing_test.py ├── search_engine_test.py ├── stemmer_test.py ├── stopwords_test.py ├── text_normalization_test.py └── tokenizer_test.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/.gitignore -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/CITATION.cff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/README.md -------------------------------------------------------------------------------- /_typos.toml: -------------------------------------------------------------------------------- 1 | [default] 2 | extend-ignore-identifiers-re = [ 3 | ".*Hsi", 4 | ] 5 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/changelog.md -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | # NEEDED BY PYTETST 2 | -------------------------------------------------------------------------------- /dev_env.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/dev_env.yml -------------------------------------------------------------------------------- /docs/advanced_retriever.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/advanced_retriever.md -------------------------------------------------------------------------------- /docs/dense_retriever.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/dense_retriever.md -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/faq.md -------------------------------------------------------------------------------- /docs/filters.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/filters.md -------------------------------------------------------------------------------- /docs/hybrid_retriever.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/hybrid_retriever.md -------------------------------------------------------------------------------- /docs/sparse_retriever.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/sparse_retriever.md -------------------------------------------------------------------------------- /docs/speed.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/speed.md -------------------------------------------------------------------------------- /docs/text_preprocessing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/docs/text_preprocessing.md -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/requirements.txt -------------------------------------------------------------------------------- /retriv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/__init__.py -------------------------------------------------------------------------------- /retriv/autotune/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/autotune/__init__.py -------------------------------------------------------------------------------- /retriv/autotune/bm25_autotune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/autotune/bm25_autotune.py -------------------------------------------------------------------------------- /retriv/autotune/merger_autotune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/autotune/merger_autotune.py -------------------------------------------------------------------------------- /retriv/base_retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/base_retriever.py -------------------------------------------------------------------------------- /retriv/dense_retriever/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /retriv/dense_retriever/ann_searcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/dense_retriever/ann_searcher.py -------------------------------------------------------------------------------- /retriv/dense_retriever/dense_retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/dense_retriever/dense_retriever.py -------------------------------------------------------------------------------- /retriv/dense_retriever/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/dense_retriever/encoder.py -------------------------------------------------------------------------------- /retriv/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/experimental/__init__.py -------------------------------------------------------------------------------- /retriv/experimental/advanced_retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/experimental/advanced_retriever.py -------------------------------------------------------------------------------- /retriv/hybrid_retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/hybrid_retriever.py -------------------------------------------------------------------------------- /retriv/merger/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /retriv/merger/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/merger/merger.py -------------------------------------------------------------------------------- /retriv/merger/normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/merger/normalization.py -------------------------------------------------------------------------------- /retriv/paths.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/paths.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /retriv/sparse_retriever/build_inverted_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/build_inverted_index.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/preprocessing/__init__.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/preprocessing/normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/preprocessing/normalization.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/preprocessing/stemmer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/preprocessing/stemmer.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/preprocessing/stopwords.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/preprocessing/stopwords.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/preprocessing/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/preprocessing/tokenizer.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/preprocessing/utils.py: -------------------------------------------------------------------------------- 1 | def identity_function(x): 2 | return x 3 | -------------------------------------------------------------------------------- /retriv/sparse_retriever/sparse_retrieval_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /retriv/sparse_retriever/sparse_retrieval_models/bm25.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/sparse_retrieval_models/bm25.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/sparse_retrieval_models/tf_idf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/sparse_retrieval_models/tf_idf.py -------------------------------------------------------------------------------- /retriv/sparse_retriever/sparse_retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/sparse_retriever/sparse_retriever.py -------------------------------------------------------------------------------- /retriv/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /retriv/utils/numba_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/retriv/utils/numba_utils.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/setup.py -------------------------------------------------------------------------------- /test_env.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/test_env.yml -------------------------------------------------------------------------------- /tests/advanced_retriever/advanced_retriever_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/advanced_retriever/advanced_retriever_test.py -------------------------------------------------------------------------------- /tests/dense_retriever/encoder_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/dense_retriever/encoder_test.py -------------------------------------------------------------------------------- /tests/merger/merger_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/merger/merger_test.py -------------------------------------------------------------------------------- /tests/merger/score_normalization_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/merger/score_normalization_test.py -------------------------------------------------------------------------------- /tests/numba_utils_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/numba_utils_test.py -------------------------------------------------------------------------------- /tests/sparse_retriever/preprocessing_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/sparse_retriever/preprocessing_test.py -------------------------------------------------------------------------------- /tests/sparse_retriever/search_engine_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/sparse_retriever/search_engine_test.py -------------------------------------------------------------------------------- /tests/sparse_retriever/stemmer_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/sparse_retriever/stemmer_test.py -------------------------------------------------------------------------------- /tests/sparse_retriever/stopwords_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/sparse_retriever/stopwords_test.py -------------------------------------------------------------------------------- /tests/sparse_retriever/text_normalization_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/sparse_retriever/text_normalization_test.py -------------------------------------------------------------------------------- /tests/sparse_retriever/tokenizer_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmenRa/retriv/HEAD/tests/sparse_retriever/tokenizer_test.py --------------------------------------------------------------------------------