├── .github
    └── workflows
    │   └── documentation.yml
├── .gitignore
├── FlagEmbedding
    ├── __init__.py
    ├── abc
    │   ├── __init__.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── data_loader.py
    │   │   ├── evaluator.py
    │   │   ├── runner.py
    │   │   ├── searcher.py
    │   │   └── utils.py
    │   ├── finetune
    │   │   ├── __init__.py
    │   │   ├── embedder
    │   │   │   ├── AbsArguments.py
    │   │   │   ├── AbsDataset.py
    │   │   │   ├── AbsModeling.py
    │   │   │   ├── AbsRunner.py
    │   │   │   ├── AbsTrainer.py
    │   │   │   └── __init__.py
    │   │   └── reranker
    │   │   │   ├── AbsArguments.py
    │   │   │   ├── AbsDataset.py
    │   │   │   ├── AbsModeling.py
    │   │   │   ├── AbsRunner.py
    │   │   │   ├── AbsTrainer.py
    │   │   │   └── __init__.py
    │   └── inference
    │   │   ├── AbsEmbedder.py
    │   │   ├── AbsReranker.py
    │   │   └── __init__.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── air_bench
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── arguments.py
    │   │   ├── examples
    │   │   │   ├── long-doc
    │   │   │   │   ├── arxiv-gemini.jsonl
    │   │   │   │   ├── arxiv-gpt3.jsonl
    │   │   │   │   ├── arxiv-llama2.jsonl
    │   │   │   │   ├── arxiv-llm-survey.jsonl
    │   │   │   │   ├── book-a-brief-history-of-time_stephen-hawking.jsonl
    │   │   │   │   ├── book-origin-of-species_darwin.jsonl
    │   │   │   │   ├── healthcare-pubmed_100k-200k_1.jsonl
    │   │   │   │   ├── healthcare-pubmed_100k-200k_2.jsonl
    │   │   │   │   ├── healthcare-pubmed_100k-200k_3.jsonl
    │   │   │   │   ├── healthcare-pubmed_30k-40k_10-merged.jsonl
    │   │   │   │   ├── healthcare-pubmed_40k-50k_5-merged.jsonl
    │   │   │   │   ├── law-lex_files_300k-400k.jsonl
    │   │   │   │   ├── law-lex_files_400k-500k.jsonl
    │   │   │   │   ├── law-lex_files_500k-600k.jsonl
    │   │   │   │   └── law-lex_files_600k-700k.jsonl
    │   │   │   └── qa
    │   │   │   │   ├── arxiv.jsonl
    │   │   │   │   ├── finance.jsonl
    │   │   │   │   ├── healthcare.jsonl
    │   │   │   │   ├── law.jsonl
    │   │   │   │   ├── msmarco.jsonl
    │   │   │   │   ├── news.jsonl
    │   │   │   │   ├── web.jsonl
    │   │   │   │   └── wiki.jsonl
    │   │   └── runner.py
    │   ├── beir
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── arguments.py
    │   │   ├── data_loader.py
    │   │   ├── evaluator.py
    │   │   ├── prompts.py
    │   │   └── runner.py
    │   ├── custom
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── data_loader.py
    │   │   └── runner.py
    │   ├── miracl
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── data_loader.py
    │   │   └── runner.py
    │   ├── mkqa
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── data_loader.py
    │   │   ├── evaluator.py
    │   │   ├── runner.py
    │   │   └── utils
    │   │   │   ├── compute_metrics.py
    │   │   │   └── normalize_text.py
    │   ├── mldr
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── data_loader.py
    │   │   └── runner.py
    │   ├── msmarco
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── data_loader.py
    │   │   └── runner.py
    │   └── mteb
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── arguments.py
    │   │   ├── examples
    │   │       ├── AmazonCounterfactualClassification.csv
    │   │       ├── AmazonPolarityClassification.csv
    │   │       ├── AmazonReviewsClassification.csv
    │   │       ├── ArguAna.csv
    │   │       ├── ArxivClusteringP2P.csv
    │   │       ├── ArxivClusteringS2S.csv
    │   │       ├── AskUbuntuDupQuestions.csv
    │   │       ├── BIOSSES.csv
    │   │       ├── Banking77Classification.csv
    │   │       ├── BiorxivClusteringP2P.csv
    │   │       ├── BiorxivClusteringS2S.csv
    │   │       ├── CQADupstack.csv
    │   │       ├── CQADupstackRetrieval.csv
    │   │       ├── ClimateFEVER.csv
    │   │       ├── DBPedia.csv
    │   │       ├── EmotionClassification.csv
    │   │       ├── FEVER.csv
    │   │       ├── FiQA2018.csv
    │   │       ├── HotpotQA.csv
    │   │       ├── ImdbClassification.csv
    │   │       ├── MSMARCO.csv
    │   │       ├── MTOPDomainClassification.csv
    │   │       ├── MTOPIntentClassification.csv
    │   │       ├── MassiveIntentClassification.csv
    │   │       ├── MassiveScenarioClassification.csv
    │   │       ├── MedrxivClusteringP2P.csv
    │   │       ├── MedrxivClusteringS2S.csv
    │   │       ├── MindSmallReranking.csv
    │   │       ├── NFCorpus.csv
    │   │       ├── NQ.csv
    │   │       ├── QuoraRetrieval.csv
    │   │       ├── RedditClustering.csv
    │   │       ├── RedditClusteringP2P.csv
    │   │       ├── SCIDOCS.csv
    │   │       ├── SICK-R.csv
    │   │       ├── STS12.csv
    │   │       ├── STS13.csv
    │   │       ├── STS14.csv
    │   │       ├── STS15.csv
    │   │       ├── STS16.csv
    │   │       ├── STS17.csv
    │   │       ├── STS22.csv
    │   │       ├── STSBenchmark.csv
    │   │       ├── SciDocsRR.csv
    │   │       ├── SciFact.csv
    │   │       ├── SprintDuplicateQuestions.csv
    │   │       ├── StackExchangeClustering.csv
    │   │       ├── StackExchangeClusteringP2P.csv
    │   │       ├── StackOverflowDupQuestions.csv
    │   │       ├── SummEval.csv
    │   │       ├── TRECCOVID.csv
    │   │       ├── Touche2020.csv
    │   │       ├── ToxicConversationsClassification.csv
    │   │       ├── TweetSentimentExtractionClassification.csv
    │   │       ├── TwentyNewsgroupsClustering.csv
    │   │       ├── TwitterSemEval2015.csv
    │   │       └── TwitterURLCorpus.csv
    │   │   ├── prompts.py
    │   │   ├── runner.py
    │   │   └── searcher.py
    ├── finetune
    │   ├── __init__.py
    │   ├── embedder
    │   │   ├── __init__.py
    │   │   ├── decoder_only
    │   │   │   ├── __init__.py
    │   │   │   ├── base
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __main__.py
    │   │   │   │   ├── arguments.py
    │   │   │   │   ├── load_model.py
    │   │   │   │   ├── modeling.py
    │   │   │   │   ├── runner.py
    │   │   │   │   └── trainer.py
    │   │   │   └── icl
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __main__.py
    │   │   │   │   ├── arguments.py
    │   │   │   │   ├── dataset.py
    │   │   │   │   ├── load_model.py
    │   │   │   │   ├── modeling.py
    │   │   │   │   ├── runner.py
    │   │   │   │   └── trainer.py
    │   │   └── encoder_only
    │   │   │   ├── __init__.py
    │   │   │   ├── base
    │   │   │       ├── __init__.py
    │   │   │       ├── __main__.py
    │   │   │       ├── modeling.py
    │   │   │       ├── runner.py
    │   │   │       └── trainer.py
    │   │   │   └── m3
    │   │   │       ├── __init__.py
    │   │   │       ├── __main__.py
    │   │   │       ├── arguments.py
    │   │   │       ├── modeling.py
    │   │   │       ├── runner.py
    │   │   │       └── trainer.py
    │   └── reranker
    │   │   ├── __init__.py
    │   │   ├── decoder_only
    │   │       ├── __init__.py
    │   │       ├── base
    │   │       │   ├── __init__.py
    │   │       │   ├── __main__.py
    │   │       │   ├── arguments.py
    │   │       │   ├── load_model.py
    │   │       │   ├── modeling.py
    │   │       │   ├── runner.py
    │   │       │   └── trainer.py
    │   │       └── layerwise
    │   │       │   ├── __init__.py
    │   │       │   ├── __main__.py
    │   │       │   ├── arguments.py
    │   │       │   ├── configuration_minicpm_reranker.py
    │   │       │   ├── load_model.py
    │   │       │   ├── modeling.py
    │   │       │   ├── modeling_minicpm_reranker.py
    │   │       │   ├── runner.py
    │   │       │   └── trainer.py
    │   │   └── encoder_only
    │   │       ├── __init__.py
    │   │       └── base
    │   │           ├── __init__.py
    │   │           ├── __main__.py
    │   │           ├── modeling.py
    │   │           ├── runner.py
    │   │           └── trainer.py
    └── inference
    │   ├── __init__.py
    │   ├── auto_embedder.py
    │   ├── auto_reranker.py
    │   ├── embedder
    │       ├── __init__.py
    │       ├── decoder_only
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   └── icl.py
    │       ├── encoder_only
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   └── m3.py
    │       └── model_mapping.py
    │   └── reranker
    │       ├── __init__.py
    │       ├── decoder_only
    │           ├── __init__.py
    │           ├── base.py
    │           ├── layerwise.py
    │           ├── lightweight.py
    │           └── models
    │           │   ├── __init__.py
    │           │   ├── configuration_minicpm_reranker.py
    │           │   ├── gemma_config.py
    │           │   ├── gemma_model.py
    │           │   └── modeling_minicpm_reranker.py
    │       ├── encoder_only
    │           ├── __init__.py
    │           └── base.py
    │       └── model_mapping.py
├── LICENSE
├── Manifest.in
├── README.md
├── README_zh.md
├── Tutorials
    ├── 1_Embedding
    │   ├── 1.1_Intro&Inference.ipynb
    │   ├── 1.2.1_BGE_Series.ipynb
    │   ├── 1.2.2_Auto_Embedder.ipynb
    │   ├── 1.2.3_BGE_v1&1.5.ipynb
    │   ├── 1.2.4_BGE-M3.ipynb
    │   ├── 1.2.5_BGE_EN_ICL.ipynb
    │   └── 1.2.6_BGE_VL.ipynb
    ├── 2_Metrics
    │   ├── 2.1_Similarity_Metrics.ipynb
    │   └── 2.2_Eval_Metrics.ipynb
    ├── 3_Indexing
    │   ├── 3.1.1_Intro_to_Faiss.ipynb
    │   ├── 3.1.2_Faiss_GPU.ipynb
    │   ├── 3.1.3_Faiss_Indexes.ipynb
    │   ├── 3.1.4_Faiss_Quantizers.ipynb
    │   └── 3.1.5_Faiss_Index_Choosing.ipynb
    ├── 4_Evaluation
    │   ├── 4.1.1_Evaluation_MSMARCO.ipynb
    │   ├── 4.2.1_MTEB_Intro.ipynb
    │   ├── 4.2.2_MTEB_Leaderboard.ipynb
    │   ├── 4.2.3_C-MTEB.ipynb
    │   ├── 4.3.1_Sentence_Transformers_Eval.ipynb
    │   ├── 4.4.1_BEIR.ipynb
    │   ├── 4.5.1_MIRACL.ipynb
    │   ├── 4.5.2_MLDR.ipynb
    │   └── utils
    │   │   ├── compute_metrics.py
    │   │   └── normalize_text.py
    ├── 5_Reranking
    │   ├── 5.1_Intro.ipynb
    │   ├── 5.2_BGE_Reranker.ipynb
    │   └── 5.3_Reranker_Eval.ipynb
    ├── 6_RAG
    │   ├── 6.1_RAG_From_Scratch.ipynb
    │   ├── 6.2_RAG_LangChain.ipynb
    │   └── 6.3_RAG_LlamaIndex.ipynb
    ├── 7_Fine-tuning
    │   ├── 7.1.1_Data_preparation.ipynb
    │   ├── 7.1.2_Fine-tune.ipynb
    │   ├── 7.1.3_Eval_FT_Model.ipynb
    │   ├── 7.2.1_Hard_Negative_Mining.ipynb
    │   └── config
    │   │   ├── ds_stage0.json
    │   │   └── ds_stage1.json
    ├── README.md
    ├── quick_start.ipynb
    └── tutorial_map.png
├── dataset
    └── README.md
├── docs
    ├── Makefile
    ├── README.md
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── API
    │       ├── abc.rst
    │       ├── abc
    │       │   ├── evaluation.rst
    │       │   ├── evaluation
    │       │   │   ├── arguments.rst
    │       │   │   ├── data_loader.rst
    │       │   │   ├── evaluator.rst
    │       │   │   ├── runner.rst
    │       │   │   └── searcher.rst
    │       │   ├── finetune.rst
    │       │   ├── finetune
    │       │   │   ├── embedder.rst
    │       │   │   ├── embedder
    │       │   │   │   ├── AbsArguments.rst
    │       │   │   │   ├── AbsDataset.rst
    │       │   │   │   ├── AbsModeling.rst
    │       │   │   │   ├── AbsRunner.rst
    │       │   │   │   └── AbsTrainer.rst
    │       │   │   ├── reranker.rst
    │       │   │   └── reranker
    │       │   │   │   ├── AbsArguments.rst
    │       │   │   │   ├── AbsDataset.rst
    │       │   │   │   ├── AbsModeling.rst
    │       │   │   │   ├── AbsRunner.rst
    │       │   │   │   └── AbsTrainer.rst
    │       │   ├── inference.rst
    │       │   └── inference
    │       │   │   ├── AbsEmbedder.rst
    │       │   │   └── AbsReranker.rst
    │       ├── evaluation.rst
    │       ├── evaluation
    │       │   ├── airbench.rst
    │       │   ├── airbench
    │       │   │   ├── arguments.rst
    │       │   │   └── runner.rst
    │       │   ├── beir.rst
    │       │   ├── beir
    │       │   │   ├── arguments.rst
    │       │   │   ├── data_loader.rst
    │       │   │   ├── evaluator.rst
    │       │   │   └── runner.rst
    │       │   ├── miracl.rst
    │       │   ├── miracl
    │       │   │   ├── data_loader.rst
    │       │   │   └── runner.rst
    │       │   ├── mkqa.rst
    │       │   ├── mkqa
    │       │   │   ├── data_loader.rst
    │       │   │   ├── evaluator.rst
    │       │   │   └── runner.rst
    │       │   ├── mldr.rst
    │       │   ├── mldr
    │       │   │   ├── data_loader.rst
    │       │   │   └── runner.rst
    │       │   ├── msmarco.rst
    │       │   ├── msmarco
    │       │   │   ├── data_loader.rst
    │       │   │   └── runner.rst
    │       │   ├── mteb.rst
    │       │   └── mteb
    │       │   │   ├── arguments.rst
    │       │   │   ├── runner.rst
    │       │   │   └── searcher.rst
    │       ├── finetune.rst
    │       ├── finetune
    │       │   ├── embedder.rst
    │       │   ├── embedder
    │       │   │   ├── decoder_only.rst
    │       │   │   ├── decoder_only
    │       │   │   │   ├── base.rst
    │       │   │   │   ├── base
    │       │   │   │   │   ├── arguments.rst
    │       │   │   │   │   ├── modeling.rst
    │       │   │   │   │   ├── runner.rst
    │       │   │   │   │   └── trainer.rst
    │       │   │   │   ├── icl.rst
    │       │   │   │   └── icl
    │       │   │   │   │   ├── arguments.rst
    │       │   │   │   │   ├── dataset.rst
    │       │   │   │   │   ├── modeling.rst
    │       │   │   │   │   ├── runner.rst
    │       │   │   │   │   └── trainer.rst
    │       │   │   ├── encoder_only.rst
    │       │   │   └── encoder_only
    │       │   │   │   ├── base.rst
    │       │   │   │   ├── base
    │       │   │   │       ├── modeling.rst
    │       │   │   │       ├── runner.rst
    │       │   │   │       └── trainer.rst
    │       │   │   │   ├── m3.rst
    │       │   │   │   └── m3
    │       │   │   │       ├── arguments.rst
    │       │   │   │       ├── modeling.rst
    │       │   │   │       ├── runner.rst
    │       │   │   │       └── trainer.rst
    │       │   ├── reranker.rst
    │       │   └── reranker
    │       │   │   ├── decoder_only.rst
    │       │   │   ├── decoder_only
    │       │   │       ├── base.rst
    │       │   │       ├── base
    │       │   │       │   ├── arguments.rst
    │       │   │       │   ├── modeling.rst
    │       │   │       │   ├── runner.rst
    │       │   │       │   └── trainer.rst
    │       │   │       ├── layerwise.rst
    │       │   │       └── layerwise
    │       │   │       │   ├── arguments.rst
    │       │   │       │   ├── modeling.rst
    │       │   │       │   ├── runner.rst
    │       │   │       │   └── trainer.rst
    │       │   │   ├── encoder_only.rst
    │       │   │   └── encoder_only
    │       │   │       ├── base.rst
    │       │   │       └── base
    │       │   │           ├── modeling.rst
    │       │   │           ├── runner.rst
    │       │   │           └── trainer.rst
    │       ├── index.rst
    │       ├── inference.rst
    │       └── inference
    │       │   ├── FlagAutoModel.rst
    │       │   ├── FlagAutoReranker.rst
    │       │   ├── embedder
    │       │       ├── decoder_only
    │       │       │   ├── BaseLLMEmbedder.rst
    │       │       │   └── ICLLLMEmbedder.rst
    │       │       ├── embedder.rst
    │       │       └── encoder_only
    │       │       │   ├── BaseEmbedder.rst
    │       │       │   └── M3Embedder.rst
    │       │   └── reranker
    │       │       ├── decoder_only
    │       │           ├── BaseLLMReranker.rst
    │       │           ├── LayerWiseLLMReranker.rst
    │       │           └── LightweightLLMReranker.rst
    │       │       ├── encoder_only
    │       │           └── BaseReranker.rst
    │       │       └── reranker.rst
    │   ├── C-MTEB.rst
    │   ├── FAQ
    │       └── index.rst
    │   ├── Introduction
    │       ├── IR.rst
    │       ├── embedder.rst
    │       ├── index.rst
    │       ├── installation.rst
    │       ├── overview.rst
    │       ├── quick_start.rst
    │       ├── reranker.rst
    │       ├── retrieval_demo.ipynb
    │       └── similarity.rst
    │   ├── _static
    │       ├── css
    │       │   └── custom.css
    │       └── img
    │       │   ├── BAAI_logo.png
    │       │   ├── BGE_WeChat_Group.png
    │       │   ├── C_MTEB.png
    │       │   ├── RAG_pipeline.png
    │       │   ├── bge_logo.jpeg
    │       │   ├── bge_panda.jpg
    │       │   ├── projects.png
    │       │   └── word2vec.png
    │   ├── bge
    │       ├── bge_icl.rst
    │       ├── bge_m3.rst
    │       ├── bge_reranker.rst
    │       ├── bge_reranker_v2.rst
    │       ├── bge_v1_v1.5.rst
    │       ├── bge_vl.rst
    │       └── index.rst
    │   ├── community
    │       └── index.rst
    │   ├── conf.py
    │   ├── index.rst
    │   └── tutorial
    │       ├── 1_Embedding.rst
    │       ├── 1_Embedding
    │           ├── 1.1.1.ipynb
    │           ├── 1.2.1.ipynb
    │           ├── 1.2.2.ipynb
    │           ├── 1.2.3.ipynb
    │           ├── 1.2.4.ipynb
    │           └── 1.2.5.ipynb
    │       ├── 2_Metrics.rst
    │       ├── 2_Metrics
    │           ├── 2.1.ipynb
    │           └── 2.2.ipynb
    │       ├── 3_Indexing.rst
    │       ├── 3_Indexing
    │           ├── 3.1.1.ipynb
    │           ├── 3.1.2.ipynb
    │           ├── 3.1.3.ipynb
    │           ├── 3.1.4.ipynb
    │           └── 3.1.5.ipynb
    │       ├── 4_Evaluation.rst
    │       ├── 4_Evaluation
    │           ├── 4.1.1.ipynb
    │           ├── 4.2.1.ipynb
    │           ├── 4.2.2.ipynb
    │           ├── 4.2.3.ipynb
    │           ├── 4.3.1.ipynb
    │           ├── 4.4.1.ipynb
    │           ├── 4.5.1.ipynb
    │           └── 4.5.2.ipynb
    │       ├── 5_Reranking.rst
    │       ├── 5_Reranking
    │           ├── 5.1.ipynb
    │           ├── 5.2.ipynb
    │           └── 5.3.ipynb
    │       ├── 6_RAG.rst
    │       ├── 6_RAG
    │           ├── 6.1.ipynb
    │           ├── 6.2.ipynb
    │           └── 6.3.ipynb
    │       ├── 7_Finetuning.rst
    │       ├── 7_Finetuning
    │           ├── 7.1.1.ipynb
    │           ├── 7.1.2.ipynb
    │           ├── 7.1.3.ipynb
    │           └── 7.2.1.ipynb
    │       └── index.rst
├── examples
    ├── README.md
    ├── evaluation
    │   ├── README.md
    │   ├── air_bench
    │   │   └── eval_air_bench.sh
    │   ├── beir
    │   │   └── eval_beir.sh
    │   ├── miracl
    │   │   └── eval_miracl.sh
    │   ├── mkqa
    │   │   └── eval_mkqa.sh
    │   ├── mldr
    │   │   └── eval_mldr.sh
    │   ├── msmarco
    │   │   └── eval_msmarco.sh
    │   └── mteb
    │   │   └── eval_mteb.sh
    ├── finetune
    │   ├── ds_stage0.json
    │   ├── ds_stage1.json
    │   ├── embedder
    │   │   ├── README.md
    │   │   ├── decoder_only
    │   │   │   ├── base.sh
    │   │   │   ├── base_same_dataset.sh
    │   │   │   └── icl_same_dataset.sh
    │   │   ├── encoder_only
    │   │   │   ├── base.sh
    │   │   │   ├── base_same_dataset.sh
    │   │   │   ├── m3.sh
    │   │   │   └── m3_same_dataset.sh
    │   │   └── example_data
    │   │   │   ├── classification-no_in_batch_neg
    │   │   │       ├── AmazonClassification.jsonl
    │   │   │       └── Banking77Classification.jsonl
    │   │   │   ├── clustering-no_in_batch_neg
    │   │   │       ├── arXiv_title.jsonl
    │   │   │       └── bioRXiv_title.jsonl
    │   │   │   ├── retrieval
    │   │   │       ├── msmarco.jsonl
    │   │   │       ├── nli.jsonl
    │   │   │       └── nq.jsonl
    │   │   │   └── sts
    │   │   │       └── sts.jsonl
    │   └── reranker
    │   │   ├── README.md
    │   │   ├── decoder_only
    │   │       ├── base.sh
    │   │       └── layerwise.sh
    │   │   ├── encoder_only
    │   │       └── base.sh
    │   │   └── example_data
    │   │       ├── normal
    │   │           └── examples.jsonl
    │   │       └── prompt_based
    │   │           └── examples.jsonl
    └── inference
    │   ├── embedder
    │       ├── README.md
    │       ├── decoder_only
    │       │   ├── auto_base_multi_devices.py
    │       │   ├── auto_base_single_device.py
    │       │   ├── auto_icl_multi_devices.py
    │       │   ├── auto_icl_single_device.py
    │       │   ├── base_multi_devices.py
    │       │   ├── base_single_device.py
    │       │   ├── icl_multi_devices.py
    │       │   └── icl_single_device.py
    │       └── encoder_only
    │       │   ├── auto_base_multi_devices.py
    │       │   ├── auto_base_single_device.py
    │       │   ├── auto_m3_multi_devices.py
    │       │   ├── auto_m3_single_device.py
    │       │   ├── base_multi_devices.py
    │       │   ├── base_single_device.py
    │       │   ├── m3_multi_devices.py
    │       │   ├── m3_multi_devices_compute_score.py
    │       │   ├── m3_single_device.py
    │       │   └── m3_single_device_compute_score.py
    │   └── reranker
    │       ├── README.md
    │       ├── decoder_only
    │           ├── auto_base_multi_devices.py
    │           ├── auto_base_single_device.py
    │           ├── auto_layerwise_multi_devices.py
    │           ├── auto_layerwise_single_device.py
    │           ├── auto_lightweight_multi_devices.py
    │           ├── auto_lightweight_single_device.py
    │           ├── base_multi_devices.py
    │           ├── base_single_device.py
    │           ├── layerwise_multi_devices.py
    │           ├── layerwise_single_device.py
    │           ├── lightweight_multi_devices.py
    │           └── lightweight_single_device.py
    │       └── encoder_only
    │           ├── auto_base_multi_devices.py
    │           ├── auto_base_single_device.py
    │           ├── base_multi_devices.py
    │           └── base_single_device.py
├── imgs
    ├── BGE_WeChat_Group.png
    ├── FlagOpen.png
    ├── bge_logo.jpg
    ├── cir_candi_1.png
    ├── cir_candi_2.png
    ├── cir_query.png
    └── projects.png
├── research
    ├── BGE_Coder
    │   ├── README.md
    │   ├── data_generation
    │   │   ├── constant.py
    │   │   ├── corpus_generator.py
    │   │   ├── format_generated_examples.py
    │   │   ├── llm.py
    │   │   ├── run_generation.py
    │   │   ├── search.py
    │   │   ├── triplet_generator.py
    │   │   └── utils.py
    │   ├── evaluation
    │   │   ├── coderag_eval
    │   │   │   ├── eval.sh
    │   │   │   ├── prepare_data.sh
    │   │   │   └── test
    │   │   │   │   ├── arguments.py
    │   │   │   │   ├── create
    │   │   │   │       ├── code_search_net.py
    │   │   │   │       ├── ds1000.py
    │   │   │   │       ├── general_programming.py
    │   │   │   │       ├── humaneval.py
    │   │   │   │       ├── live_code_bench.py
    │   │   │   │       ├── mbpp.py
    │   │   │   │       ├── odex.py
    │   │   │   │       ├── repoeval.py
    │   │   │   │       ├── repoeval_repo.py
    │   │   │   │       ├── swebench.py
    │   │   │   │       ├── swebench_repo.py
    │   │   │   │       └── utils.py
    │   │   │   │   ├── main.py
    │   │   │   │   └── prompts.py
    │   │   └── coir_eval
    │   │   │   ├── arguments.py
    │   │   │   ├── eval.sh
    │   │   │   ├── main.py
    │   │   │   └── prompts.py
    │   └── paper
    │   │   └── CodeR.pdf
    ├── BGE_M3
    │   ├── BGE_M3.pdf
    │   ├── README.md
    │   ├── __init__.py
    │   ├── arguments.py
    │   ├── data.py
    │   ├── imgs
    │   │   ├── bm25.jpg
    │   │   ├── long.jpg
    │   │   ├── miracl.jpg
    │   │   ├── mkqa.jpg
    │   │   ├── nqa.jpg
    │   │   └── others.webp
    │   ├── modeling.py
    │   ├── run.py
    │   ├── split_data_by_length.py
    │   └── trainer.py
    ├── BGE_VL
    │   ├── LICENSE
    │   ├── README.md
    │   ├── assets
    │   │   ├── cir_candi_1.png
    │   │   ├── cir_candi_2.png
    │   │   ├── cir_query.png
    │   │   ├── corpus
    │   │   │   ├── 000000032077.jpg
    │   │   │   ├── 000000050549.jpg
    │   │   │   ├── 000000098911.jpg
    │   │   │   ├── 000000156031.jpg
    │   │   │   ├── 000000244097.jpg
    │   │   │   ├── 000000272130.jpg
    │   │   │   ├── 000000275230.jpg
    │   │   │   ├── 000000311907.jpg
    │   │   │   ├── 000000357304.jpg
    │   │   │   ├── 000000478916.jpg
    │   │   │   └── 000000545037.jpg
    │   │   ├── query
    │   │   │   └── 000000530944.jpg
    │   │   ├── res-ft-mmeb.png
    │   │   ├── res-scaling.png
    │   │   ├── res-zs-cir.png
    │   │   └── res-zs-mmeb.png
    │   ├── eval
    │   │   ├── data
    │   │   │   ├── circo_corpus.jsonl
    │   │   │   ├── circo_query.jsonl
    │   │   │   ├── fashioniq_dress_corpus.jsonl
    │   │   │   ├── fashioniq_dress_query_val.jsonl
    │   │   │   ├── fashioniq_shirt_corpus.jsonl
    │   │   │   ├── fashioniq_shirt_query_val.jsonl
    │   │   │   ├── fashioniq_toptee_corpus.jsonl
    │   │   │   └── fashioniq_toptee_query_val.jsonl
    │   │   ├── eval_Circo.py
    │   │   ├── eval_fashioniq.py
    │   │   ├── flag_dataset.py
    │   │   ├── flag_mmret.py
    │   │   └── results
    │   │   │   ├── mmret_base_circo.json
    │   │   │   └── mmret_large_circo.json
    │   ├── modeling_MMRet_CLIP.py
    │   └── retrieval_demo.ipynb
    ├── BGE_VL_Screenshot
    │   ├── README.md
    │   └── assets
    │   │   ├── neg_1.jpeg
    │   │   ├── neg_2.jpeg
    │   │   ├── positive_1.jpeg
    │   │   ├── positive_2.jpeg
    │   │   ├── query_1.png
    │   │   └── query_2.png
    ├── C_MTEB
    │   ├── C_MTEB
    │   │   ├── __init__.py
    │   │   └── tasks
    │   │   │   ├── Classification.py
    │   │   │   ├── Clustering.py
    │   │   │   ├── MultiLongDocRetrieval.py
    │   │   │   ├── PairClassification.py
    │   │   │   ├── Reranking.py
    │   │   │   ├── Retrieval.py
    │   │   │   ├── STS.py
    │   │   │   └── __init__.py
    │   ├── MKQA
    │   │   ├── README.md
    │   │   ├── dense_retrieval
    │   │   │   ├── step0-generate_embedding.py
    │   │   │   ├── step1-search_results.py
    │   │   │   └── step2-eval_dense_mkqa.py
    │   │   ├── hybrid_retrieval
    │   │   │   ├── step0-hybrid_search_results.py
    │   │   │   └── step1-eval_hybrid_mkqa.py
    │   │   ├── multi_vector_rerank
    │   │   │   ├── hybrid_all_results.py
    │   │   │   ├── step0-rerank_results.py
    │   │   │   └── step1-eval_rerank_mkqa.py
    │   │   ├── sparse_retrieval
    │   │   │   ├── bm25_baseline.py
    │   │   │   ├── bm25_baseline_same_tokenizer.py
    │   │   │   ├── step0-encode_query-and-corpus.py
    │   │   │   ├── step1-search_results.py
    │   │   │   └── step2-eval_sparse_mkqa.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── evaluation.py
    │   │   │   └── normalize_text.py
    │   ├── MLDR
    │   │   ├── README.md
    │   │   ├── dense_retrieval
    │   │   │   ├── step0-generate_embedding.py
    │   │   │   ├── step1-search_results.py
    │   │   │   └── step2-eval_dense_mldr.py
    │   │   ├── hybrid_retrieval
    │   │   │   ├── step0-hybrid_search_results.py
    │   │   │   └── step1-eval_hybrid_mldr.py
    │   │   ├── mteb_dense_eval
    │   │   │   ├── eval_MLDR.py
    │   │   │   └── flag_dres_model.py
    │   │   ├── multi_vector_rerank
    │   │   │   ├── hybrid_all_results.py
    │   │   │   ├── step0-rerank_results.py
    │   │   │   └── step1-eval_rerank_mldr.py
    │   │   └── sparse_retrieval
    │   │   │   ├── bm25_baseline.py
    │   │   │   ├── bm25_baseline_same_tokenizer.py
    │   │   │   ├── step0-encode_query-and-corpus.py
    │   │   │   ├── step1-search_results.py
    │   │   │   └── step2-eval_sparse_mldr.py
    │   ├── README.md
    │   ├── eval_C-MTEB.py
    │   ├── eval_MTEB.py
    │   ├── eval_cross_encoder.py
    │   ├── flag_dres_model.py
    │   ├── setup.py
    │   └── summarize_results.py
    ├── LLARA
    │   ├── README.md
    │   ├── data
    │   │   ├── finetune
    │   │   │   └── toy_finetune_data.jsonl
    │   │   └── pretrain
    │   │   │   └── toy_pretrain_data.jsonl
    │   ├── finetune
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── data.py
    │   │   ├── load_model.py
    │   │   ├── modeling.py
    │   │   ├── run.py
    │   │   └── trainer.py
    │   ├── pretrain
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── data.py
    │   │   ├── load_model.py
    │   │   ├── modeling.py
    │   │   ├── run.py
    │   │   └── trainer.py
    │   └── stage1.json
    ├── LM_Cocktail
    │   ├── LM_Cocktail
    │   │   ├── __init__.py
    │   │   ├── cocktail.py
    │   │   └── utils.py
    │   ├── README.md
    │   ├── embedder_examples.json
    │   ├── images
    │   │   ├── 1.png
    │   │   └── pic.png
    │   ├── llm_examples.json
    │   └── setup.py
    ├── Long_LLM
    │   ├── activation_beacon
    │   │   ├── README.md
    │   │   ├── data
    │   │   │   ├── config
    │   │   │   │   ├── code.json
    │   │   │   │   ├── even.json
    │   │   │   │   ├── fsdp-offload.yaml
    │   │   │   │   ├── fsdp.yaml
    │   │   │   │   ├── slimpajama.json
    │   │   │   │   ├── zero3-infer-offload.yaml
    │   │   │   │   └── zero3-infer.yaml
    │   │   │   ├── deepspeed
    │   │   │   │   ├── stage2-offload.json
    │   │   │   │   ├── stage2.json
    │   │   │   │   ├── stage3-offload-optim.json
    │   │   │   │   ├── stage3-offload.json
    │   │   │   │   └── stage3.json
    │   │   │   └── toy
    │   │   │   │   └── infbench.json
    │   │   ├── examples
    │   │   │   ├── evaluation.md
    │   │   │   └── training.md
    │   │   ├── main
    │   │   │   ├── eval_generation.py
    │   │   │   ├── eval_infbench.py
    │   │   │   ├── eval_lm.py
    │   │   │   ├── eval_longbench.py
    │   │   │   ├── eval_mmlu.py
    │   │   │   ├── eval_msc.py
    │   │   │   ├── eval_multiturn.py
    │   │   │   ├── eval_needle.py
    │   │   │   ├── eval_passkey.py
    │   │   │   ├── eval_topic.py
    │   │   │   ├── infbench_utils.py
    │   │   │   ├── longbench_utils.py
    │   │   │   ├── pretrain_data.py
    │   │   │   ├── train.py
    │   │   │   └── vllm_symlink.py
    │   │   └── src
    │   │   │   ├── __init__.py
    │   │   │   ├── args.py
    │   │   │   ├── chat.py
    │   │   │   ├── data.py
    │   │   │   ├── llama
    │   │   │       ├── __init__.py
    │   │   │       ├── configuration_llama.py
    │   │   │       └── modeling_llama.py
    │   │   │   ├── metrics.py
    │   │   │   ├── mistral
    │   │   │       ├── __init__.py
    │   │   │       ├── configuration_mistral.py
    │   │   │       └── modeling_mistral.py
    │   │   │   ├── modeling_beacon.py
    │   │   │   ├── modeling_utils.py
    │   │   │   ├── qwen2
    │   │   │       ├── __init__.py
    │   │   │       ├── configuration_qwen2.py
    │   │   │       └── modeling_qwen2.py
    │   │   │   ├── trainer.py
    │   │   │   ├── utils.py
    │   │   │   └── vllm_utils.py
    │   └── longllm_qlora
    │   │   ├── README.md
    │   │   ├── data
    │   │       └── narrativeqa.json
    │   │   ├── data_pipeline
    │   │       ├── README.md
    │   │       ├── _openai.py
    │   │       ├── data
    │   │       │   └── README.md
    │   │       ├── prepare_bio_book.ipynb
    │   │       ├── prepare_multi_details_book.ipynb
    │   │       ├── prepare_multi_details_paper_long.ipynb
    │   │       ├── prepare_one_detail_book.ipynb
    │   │       ├── prepare_one_detail_paper_long.ipynb
    │   │       └── raw_data
    │   │       │   └── README.md
    │   │   ├── imgs
    │   │       └── needle.png
    │   │   ├── main
    │   │       ├── eval_generation.py
    │   │       ├── eval_infbench.py
    │   │       ├── eval_lm.py
    │   │       ├── eval_longbench.py
    │   │       ├── eval_mmlu.py
    │   │       ├── eval_needle.py
    │   │       ├── eval_passkey.py
    │   │       ├── eval_topic.py
    │   │       ├── infbench_utils.py
    │   │       ├── longbench_utils.py
    │   │       └── train.py
    │   │   └── src
    │   │       ├── __init__.py
    │   │       ├── args.py
    │   │       ├── chat.py
    │   │       ├── data.py
    │   │       ├── metrics.py
    │   │       ├── modeling_utils.py
    │   │       ├── trainer.py
    │   │       └── utils.py
    ├── MLVU
    │   ├── README.md
    │   ├── data
    │   │   ├── 1_plotQA.json
    │   │   ├── 2_needle.json
    │   │   ├── 3_ego.json
    │   │   ├── 4_count.json
    │   │   ├── 5_order.json
    │   │   ├── 6_anomaly_reco.json
    │   │   ├── 7_topic_reasoning.json
    │   │   ├── 8_sub_scene.json
    │   │   └── 9_summary.json
    │   ├── evaluation
    │   │   ├── README.md
    │   │   ├── generation_evaluation
    │   │   │   ├── calculate.py
    │   │   │   ├── calculate_sum.py
    │   │   │   ├── evaluate_ssc.py
    │   │   │   ├── evaluate_summary.py
    │   │   │   └── open_bench.py
    │   │   ├── models
    │   │   │   ├── videochat2
    │   │   │   │   ├── choice_bench.py
    │   │   │   │   └── open_bench.py
    │   │   │   └── videollava
    │   │   │   │   ├── choice_bench.py
    │   │   │   │   └── open_bench.py
    │   │   └── multiple_choice_evaluation
    │   │   │   └── choice_bench.py
    │   └── figs
    │   │   ├── statistic.png
    │   │   └── task_example.png
    ├── Matroyshka_reranker
    │   ├── README.md
    │   ├── finetune
    │   │   ├── compensation
    │   │   │   ├── __init__.py
    │   │   │   ├── arguments.py
    │   │   │   ├── data.py
    │   │   │   ├── load_model.py
    │   │   │   ├── mistral_config.py
    │   │   │   ├── mistral_model.py
    │   │   │   ├── modeling.py
    │   │   │   ├── run.py
    │   │   │   ├── stage1.json
    │   │   │   └── trainer.py
    │   │   └── self_distillation
    │   │   │   ├── __init__.py
    │   │   │   ├── arguments.py
    │   │   │   ├── data.py
    │   │   │   ├── load_model.py
    │   │   │   ├── mistral_config.py
    │   │   │   ├── mistral_model.py
    │   │   │   ├── modeling.py
    │   │   │   ├── run.py
    │   │   │   ├── stage1.json
    │   │   │   └── trainer.py
    │   ├── inference
    │   │   ├── __init__.py
    │   │   ├── mistral_config.py
    │   │   ├── mistral_model.py
    │   │   └── rank_model.py
    │   └── requirements.txt
    ├── README.md
    ├── Reinforced_IR
    │   ├── README.md
    │   ├── data_generation
    │   │   ├── agent
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt.py
    │   │   │   ├── vllm.py
    │   │   │   └── vllm_instruct.py
    │   │   ├── generate_generator_data.py
    │   │   ├── generate_retriever_data.py
    │   │   ├── generate_retriever_distill_data.py
    │   │   ├── generate_universal_query.py
    │   │   ├── prompts
    │   │   │   ├── __init__.py
    │   │   │   ├── generate_prompts.py
    │   │   │   ├── get_prompts.py
    │   │   │   ├── hyde_prompts.py
    │   │   │   ├── teacher_prompts.py
    │   │   │   └── train_prompts.py
    │   │   └── utils.py
    │   ├── finetune
    │   │   ├── generator
    │   │   │   ├── save_tokenizer.py
    │   │   │   └── update_file.py
    │   │   ├── retriever
    │   │   │   ├── arguments.py
    │   │   │   ├── dataset.py
    │   │   │   ├── modeling.py
    │   │   │   ├── run.py
    │   │   │   ├── runner.py
    │   │   │   └── trainer.py
    │   │   └── stage1.json
    │   ├── inference
    │   │   ├── agent
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt.py
    │   │   │   ├── vllm.py
    │   │   │   └── vllm_instruct.py
    │   │   ├── ir_model.py
    │   │   ├── multi.py
    │   │   └── test.py
    │   └── requirements.txt
    ├── baai_general_embedding
    │   ├── README.md
    │   ├── __init__.py
    │   ├── finetune
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── data.py
    │   │   ├── eval_msmarco.py
    │   │   ├── hn_mine.py
    │   │   ├── modeling.py
    │   │   ├── run.py
    │   │   └── trainer.py
    │   └── retromae_pretrain
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── data.py
    │   │   ├── enhancedDecoder.py
    │   │   ├── modeling.py
    │   │   ├── run.py
    │   │   ├── trainer.py
    │   │   └── utils.py
    ├── llm_dense_retriever
    │   ├── README.md
    │   ├── examples
    │   │   └── bge-en-icl
    │   │   │   ├── AIR-Bench
    │   │   │       ├── long-doc
    │   │   │       │   ├── arxiv-gemini.jsonl
    │   │   │       │   ├── arxiv-gpt3.jsonl
    │   │   │       │   ├── arxiv-llama2.jsonl
    │   │   │       │   ├── arxiv-llm-survey.jsonl
    │   │   │       │   ├── book-a-brief-history-of-time_stephen-hawking.jsonl
    │   │   │       │   ├── book-origin-of-species_darwin.jsonl
    │   │   │       │   ├── healthcare-pubmed_100k-200k_1.jsonl
    │   │   │       │   ├── healthcare-pubmed_100k-200k_2.jsonl
    │   │   │       │   ├── healthcare-pubmed_100k-200k_3.jsonl
    │   │   │       │   ├── healthcare-pubmed_30k-40k_10-merged.jsonl
    │   │   │       │   ├── healthcare-pubmed_40k-50k_5-merged.jsonl
    │   │   │       │   ├── law-lex_files_300k-400k.jsonl
    │   │   │       │   ├── law-lex_files_400k-500k.jsonl
    │   │   │       │   ├── law-lex_files_500k-600k.jsonl
    │   │   │       │   └── law-lex_files_600k-700k.jsonl
    │   │   │       └── qa
    │   │   │       │   ├── arxiv.jsonl
    │   │   │       │   ├── finance.jsonl
    │   │   │       │   ├── healthcare.jsonl
    │   │   │       │   ├── law.jsonl
    │   │   │       │   ├── msmarco.jsonl
    │   │   │       │   ├── news.jsonl
    │   │   │       │   ├── web.jsonl
    │   │   │       │   └── wiki.jsonl
    │   │   │   └── MTEB
    │   │   │       ├── AmazonCounterfactualClassification.json
    │   │   │       ├── AmazonPolarityClassification.json
    │   │   │       ├── AmazonReviewsClassification.json
    │   │   │       ├── ArguAna.json
    │   │   │       ├── ArxivClusteringP2P.json
    │   │   │       ├── ArxivClusteringS2S.json
    │   │   │       ├── AskUbuntuDupQuestions.json
    │   │   │       ├── BIOSSES.json
    │   │   │       ├── Banking77Classification.json
    │   │   │       ├── BiorxivClusteringP2P.json
    │   │   │       ├── BiorxivClusteringS2S.json
    │   │   │       ├── CQADupstackRetrieval.json
    │   │   │       ├── ClimateFEVER.json
    │   │   │       ├── DBPedia.json
    │   │   │       ├── EmotionClassification.json
    │   │   │       ├── FEVER.json
    │   │   │       ├── FiQA2018.json
    │   │   │       ├── HotpotQA.json
    │   │   │       ├── ImdbClassification.json
    │   │   │       ├── MSMARCO.json
    │   │   │       ├── MTOPDomainClassification.json
    │   │   │       ├── MTOPIntentClassification.json
    │   │   │       ├── MassiveIntentClassification.json
    │   │   │       ├── MassiveScenarioClassification.json
    │   │   │       ├── MedrxivClusteringP2P.json
    │   │   │       ├── MedrxivClusteringS2S.json
    │   │   │       ├── MindSmallReranking.json
    │   │   │       ├── NFCorpus.json
    │   │   │       ├── NQ.json
    │   │   │       ├── QuoraRetrieval.json
    │   │   │       ├── RedditClustering.json
    │   │   │       ├── RedditClusteringP2P.json
    │   │   │       ├── SCIDOCS.json
    │   │   │       ├── SICK-R.json
    │   │   │       ├── STS12.json
    │   │   │       ├── STS13.json
    │   │   │       ├── STS14.json
    │   │   │       ├── STS15.json
    │   │   │       ├── STS16.json
    │   │   │       ├── STS17.json
    │   │   │       ├── STS22.json
    │   │   │       ├── STSBenchmark.json
    │   │   │       ├── SciDocsRR.json
    │   │   │       ├── SciFact.json
    │   │   │       ├── SprintDuplicateQuestions.json
    │   │   │       ├── StackExchangeClustering.json
    │   │   │       ├── StackExchangeClusteringP2P.json
    │   │   │       ├── StackOverflowDupQuestions.json
    │   │   │       ├── SummEval.json
    │   │   │       ├── TRECCOVID.json
    │   │   │       ├── Touche2020.json
    │   │   │       ├── ToxicConversationsClassification.json
    │   │   │       ├── TweetSentimentExtractionClassification.json
    │   │   │       ├── TwentyNewsgroupsClustering.json
    │   │   │       ├── TwitterSemEval2015.json
    │   │   │       └── TwitterURLCorpus.json
    │   └── finetune
    │   │   ├── arguments.py
    │   │   ├── data.py
    │   │   ├── load_model.py
    │   │   ├── modeling.py
    │   │   ├── run.py
    │   │   └── trainer.py
    ├── llm_embedder
    │   ├── README.md
    │   ├── data
    │   │   ├── deepspeed
    │   │   │   ├── stage0.json
    │   │   │   ├── stage2-offload.json
    │   │   │   ├── stage2.json
    │   │   │   ├── stage3-offload-all.json
    │   │   │   ├── stage3-offload-optim.json
    │   │   │   └── stage3.json
    │   │   └── toy
    │   │   │   ├── chat.json
    │   │   │   ├── convsearch.json
    │   │   │   ├── icl.json
    │   │   │   ├── lrlm.json
    │   │   │   ├── qa.json
    │   │   │   └── tool.json
    │   ├── docs
    │   │   ├── evaluation.md
    │   │   └── fine-tune.md
    │   ├── environment.yaml
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── eval_icl.py
    │   │   ├── eval_lrlm.py
    │   │   ├── eval_mmlu.py
    │   │   ├── eval_msc.py
    │   │   ├── eval_popqa.py
    │   │   ├── eval_qa.py
    │   │   ├── eval_qrecc.py
    │   │   ├── eval_retrieval.py
    │   │   ├── eval_tool.py
    │   │   └── icl_utils.py
    │   ├── imgs
    │   │   └── llm-embedder.png
    │   ├── run_dense.py
    │   ├── run_lm_score.py
    │   ├── run_ranker.py
    │   ├── scripts
    │   │   ├── llm-embedder.sh
    │   │   └── ours2st.py
    │   └── src
    │   │   ├── __init__.py
    │   │   ├── lm
    │   │       ├── __init__.py
    │   │       ├── args.py
    │   │       ├── modeling_lm.py
    │   │       └── modeling_srlm.py
    │   │   ├── retrieval
    │   │       ├── __init__.py
    │   │       ├── args.py
    │   │       ├── data.py
    │   │       ├── evalnq.py
    │   │       ├── metrics.py
    │   │       ├── modeling_bm25.py
    │   │       ├── modeling_dense.py
    │   │       ├── modeling_ranker.py
    │   │       ├── modeling_unified.py
    │   │       └── trainer.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── llama_patch.py
    │   │       └── util.py
    ├── llm_reranker
    │   ├── README.md
    │   ├── __init__.py
    │   ├── evaluate.py
    │   ├── evaluation
    │   │   ├── BEIR-bge-en-v1.5.png
    │   │   ├── BEIR-e5-mistral.png
    │   │   ├── CMTEB-retrieval-bge-zh-v1.5.png
    │   │   ├── llama-index.png
    │   │   └── miracl-bge-m3.png
    │   ├── finetune_for_instruction
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── data.py
    │   │   ├── load_model.py
    │   │   ├── modeling.py
    │   │   ├── run.py
    │   │   └── trainer.py
    │   ├── finetune_for_layerwise
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── configuration_minicpm_reranker.py
    │   │   ├── data.py
    │   │   ├── load_model.py
    │   │   ├── modeling.py
    │   │   ├── modeling_minicpm_reranker.py
    │   │   ├── run.py
    │   │   └── trainer.py
    │   ├── merge
    │   │   ├── __init__.py
    │   │   ├── configuration_minicpm_reranker.py
    │   │   ├── merge_base_model.py
    │   │   ├── merge_layerwise_model_from_finetuned_model.py
    │   │   ├── merge_layerwise_model_from_raw_model.py
    │   │   └── modeling_minicpm_reranker.py
    │   ├── stage1.json
    │   └── toy_finetune_data.jsonl
    ├── old-examples
    │   ├── finetune
    │   │   ├── README.md
    │   │   ├── ds_config.json
    │   │   ├── toy_evaluation_data
    │   │   │   ├── toy_corpus.json
    │   │   │   └── toy_query.json
    │   │   └── toy_finetune_data.jsonl
    │   ├── pretrain
    │   │   ├── README.md
    │   │   ├── retromae_pretrain
    │   │   │   ├── __init__.py
    │   │   │   ├── arguments.py
    │   │   │   ├── data.py
    │   │   │   ├── enhancedDecoder.py
    │   │   │   ├── modeling.py
    │   │   │   ├── run.py
    │   │   │   ├── trainer.py
    │   │   │   └── utils.py
    │   │   └── toy_pretrain_data.jsonl
    │   ├── reranker
    │   │   ├── README.md
    │   │   ├── ds_config.json
    │   │   └── toy_finetune_data.jsonl
    │   ├── search_demo
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── pre_process.py
    │   │   ├── readme.md
    │   │   ├── requirements.txt
    │   │   ├── run.py
    │   │   └── tool.py
    │   └── unified_finetune
    │   │   ├── README.md
    │   │   ├── toy_train_data
    │   │       ├── toy_train_data1.jsonl
    │   │       └── toy_train_data2.jsonl
    │   │   └── unified_finetune_bge-m3_exmaple.sh
    ├── reranker
    │   ├── README.md
    │   ├── __init__.py
    │   ├── arguments.py
    │   ├── data.py
    │   ├── modeling.py
    │   ├── run.py
    │   └── trainer.py
    └── visual_bge
    │   ├── README.md
    │   ├── __init__.py
    │   ├── imgs
    │       ├── SFT-CIRR.png
    │       ├── SFT-ReMuQ.png
    │       ├── SFT-WebQA.png
    │       ├── cir_candi_1.png
    │       ├── cir_candi_2.png
    │       ├── cir_query.png
    │       ├── wiki_candi_1.jpg
    │       ├── wiki_candi_2.jpg
    │       ├── zs-benchmark.png
    │       └── zs-performance.png
    │   ├── setup.py
    │   └── visual_bge
    │       ├── eva_clip
    │           ├── __init__.py
    │           ├── bpe_simple_vocab_16e6.txt.gz
    │           ├── constants.py
    │           ├── eva_vit_model.py
    │           ├── factory.py
    │           ├── hf_configs.py
    │           ├── hf_model.py
    │           ├── loss.py
    │           ├── model.py
    │           ├── model_configs
    │           │   ├── EVA01-CLIP-B-16.json
    │           │   ├── EVA01-CLIP-g-14-plus.json
    │           │   ├── EVA01-CLIP-g-14.json
    │           │   ├── EVA02-CLIP-B-16.json
    │           │   ├── EVA02-CLIP-L-14-336.json
    │           │   ├── EVA02-CLIP-L-14.json
    │           │   ├── EVA02-CLIP-bigE-14-plus.json
    │           │   └── EVA02-CLIP-bigE-14.json
    │           ├── modified_resnet.py
    │           ├── openai.py
    │           ├── pretrained.py
    │           ├── rope.py
    │           ├── timm_model.py
    │           ├── tokenizer.py
    │           ├── transform.py
    │           ├── transformer.py
    │           └── utils.py
    │       └── modeling.py
├── scripts
    ├── README.md
    ├── add_reranker_score.py
    ├── hn_mine.py
    └── split_data_by_length.py
└── setup.py


/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: documentation
 2 | 
 3 | on: [push, pull_request, workflow_dispatch]
 4 | 
 5 | permissions:
 6 |   contents: write
 7 | 
 8 | jobs:
 9 |   docs:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - uses: actions/setup-python@v5
14 |       - name: Install doc dependencies
15 |         run: |
16 |           pip install . sphinx myst_parser myst-nb sphinx-design pydata-sphinx-theme sphinxcontrib-googleanalytics
17 |       - name: Install content dependencies
18 |         run: |
19 |           pip install faiss-cpu mteb air-benchmark beir
20 |       - name: Sphinx build
21 |         run: |
22 |           sphinx-build docs/source docs/build
23 |       - name: Add CNAME
24 |         run: |
25 |           echo bge-model.com > docs/build/CNAME
26 |       - name: Deploy to GitHub Pages
27 |         uses: peaceiris/actions-gh-pages@v3
28 |         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
29 |         with:
30 |           publish_branch: gh-pages
31 |           github_token: ${{ secrets.GITHUB_TOKEN }}
32 |           publish_dir: docs/build/
33 |           force_orphan: true
34 | 


--------------------------------------------------------------------------------
/FlagEmbedding/__init__.py:
--------------------------------------------------------------------------------
1 | from .abc.inference import *
2 | from .inference import *
3 | 


--------------------------------------------------------------------------------
/FlagEmbedding/abc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/abc/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/abc/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .arguments import AbsEvalArgs, AbsEvalModelArgs
 2 | from .evaluator import AbsEvaluator
 3 | from .data_loader import AbsEvalDataLoader
 4 | from .searcher import EvalRetriever, EvalDenseRetriever, EvalReranker
 5 | from .runner import AbsEvalRunner
 6 | 
 7 | 
 8 | __all__ = [
 9 |     "AbsEvalArgs",
10 |     "AbsEvalModelArgs",
11 |     "AbsEvaluator",
12 |     "AbsEvalDataLoader",
13 |     "EvalRetriever",
14 |     "EvalDenseRetriever",
15 |     "EvalReranker",
16 |     "AbsEvalRunner",
17 | ]
18 | 


--------------------------------------------------------------------------------
/FlagEmbedding/abc/finetune/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/abc/finetune/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/abc/finetune/embedder/__init__.py:
--------------------------------------------------------------------------------
 1 | from .AbsArguments import (
 2 |     AbsEmbedderDataArguments,
 3 |     AbsEmbedderModelArguments,
 4 |     AbsEmbedderTrainingArguments,
 5 | )
 6 | from .AbsDataset import (
 7 |     AbsEmbedderCollator, AbsEmbedderSameDatasetCollator,
 8 |     AbsEmbedderSameDatasetTrainDataset,
 9 |     AbsEmbedderTrainDataset,
10 |     EmbedderTrainerCallbackForDataRefresh,
11 | )
12 | from .AbsModeling import AbsEmbedderModel, EmbedderOutput
13 | from .AbsTrainer import AbsEmbedderTrainer
14 | from .AbsRunner import AbsEmbedderRunner
15 | 
16 | 
17 | __all__ = [
18 |     "AbsEmbedderModelArguments",
19 |     "AbsEmbedderDataArguments",
20 |     "AbsEmbedderTrainingArguments",
21 |     "AbsEmbedderModel",
22 |     "AbsEmbedderTrainer",
23 |     "AbsEmbedderRunner",
24 |     "AbsEmbedderTrainDataset",
25 |     "AbsEmbedderCollator",
26 |     "AbsEmbedderSameDatasetTrainDataset",
27 |     "AbsEmbedderSameDatasetCollator",
28 |     "EmbedderOutput",
29 |     "EmbedderTrainerCallbackForDataRefresh",
30 | ]
31 | 


--------------------------------------------------------------------------------
/FlagEmbedding/abc/finetune/reranker/__init__.py:
--------------------------------------------------------------------------------
 1 | from .AbsArguments import AbsRerankerDataArguments, AbsRerankerModelArguments, AbsRerankerTrainingArguments
 2 | from .AbsDataset import (
 3 |     AbsRerankerTrainDataset, AbsRerankerCollator,
 4 |     AbsLLMRerankerTrainDataset, AbsLLMRerankerCollator
 5 | )
 6 | from .AbsModeling import AbsRerankerModel, RerankerOutput
 7 | from .AbsTrainer import AbsRerankerTrainer
 8 | from .AbsRunner import AbsRerankerRunner
 9 | 
10 | __all__ = [
11 |     "AbsRerankerDataArguments",
12 |     "AbsRerankerModelArguments",
13 |     "AbsRerankerTrainingArguments",
14 |     "AbsRerankerTrainDataset",
15 |     "AbsRerankerCollator",
16 |     "AbsLLMRerankerTrainDataset",
17 |     "AbsLLMRerankerCollator",
18 |     "AbsRerankerModel",
19 |     "RerankerOutput",
20 |     "AbsRerankerTrainer",
21 |     "AbsRerankerRunner",
22 | ]
23 | 


--------------------------------------------------------------------------------
/FlagEmbedding/abc/inference/__init__.py:
--------------------------------------------------------------------------------
1 | from .AbsEmbedder import AbsEmbedder
2 | from .AbsReranker import AbsReranker
3 | 
4 | __all__ = [
5 |     'AbsEmbedder',
6 |     'AbsReranker'
7 | ]
8 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/evaluation/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/air_bench/__init__.py:
--------------------------------------------------------------------------------
1 | from .arguments import AIRBenchEvalModelArgs, AIRBenchEvalArgs
2 | from .runner import AIRBenchEvalRunner
3 | 
4 | __all__ = [
5 |     "AIRBenchEvalModelArgs",
6 |     "AIRBenchEvalArgs",
7 |     "AIRBenchEvalRunner"
8 | ]
9 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/air_bench/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.air_bench import (
 4 |     AIRBenchEvalArgs, AIRBenchEvalModelArgs,
 5 |     AIRBenchEvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         AIRBenchEvalArgs,
12 |         AIRBenchEvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: AIRBenchEvalArgs
17 |     model_args: AIRBenchEvalModelArgs
18 | 
19 |     runner = AIRBenchEvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 |     print("==============================================")
30 |     print("Search results have been generated.")
31 |     print("For computing metrics, please refer to the official AIR-Bench docs:")
32 |     print("- https://github.com/AIR-Bench/AIR-Bench/blob/main/docs/submit_to_leaderboard.md")
33 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/beir/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import (
 2 |     AbsEvalModelArgs as BEIREvalModelArgs,
 3 | )
 4 | 
 5 | from .data_loader import BEIREvalDataLoader
 6 | from .arguments import BEIREvalArgs
 7 | from .runner import BEIREvalRunner
 8 | 
 9 | __all__ = [
10 |     "BEIREvalArgs",
11 |     "BEIREvalModelArgs",
12 |     "BEIREvalRunner",
13 |     "BEIREvalDataLoader",
14 | ]
15 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/beir/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.beir import (
 4 |     BEIREvalArgs, BEIREvalModelArgs,
 5 |     BEIREvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         BEIREvalArgs,
12 |         BEIREvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: BEIREvalArgs
17 |     model_args: BEIREvalModelArgs
18 | 
19 |     runner = BEIREvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/beir/arguments.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | from FlagEmbedding.abc.evaluation.arguments import AbsEvalArgs
 4 | 
 5 | 
 6 | @dataclass
 7 | class BEIREvalArgs(AbsEvalArgs):
 8 |     """
 9 |     Argument class for BEIR evaluation.
10 |     """
11 |     use_special_instructions: bool = field(
12 |         default=False, metadata={"help": "Whether to use specific instructions in `prompts.py` for evaluation. Default: False"}
13 |     )
14 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/custom/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import (
 2 |     AbsEvalArgs as CustomEvalArgs,
 3 |     AbsEvalModelArgs as CustomEvalModelArgs,
 4 | )
 5 | 
 6 | from .data_loader import CustomEvalDataLoader
 7 | from .runner import CustomEvalRunner
 8 | 
 9 | __all__ = [
10 |     "CustomEvalArgs",
11 |     "CustomEvalModelArgs",
12 |     "CustomEvalRunner",
13 |     "CustomEvalDataLoader",
14 | ]
15 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/custom/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.custom import (
 4 |     CustomEvalArgs, CustomEvalModelArgs,
 5 |     CustomEvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         CustomEvalArgs,
12 |         CustomEvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: CustomEvalArgs
17 |     model_args: CustomEvalModelArgs
18 | 
19 |     runner = CustomEvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/custom/data_loader.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from tqdm import tqdm
 3 | from typing import List, Optional
 4 | 
 5 | from FlagEmbedding.abc.evaluation import AbsEvalDataLoader
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class CustomEvalDataLoader(AbsEvalDataLoader):
11 |     def available_dataset_names(self) -> List[str]:
12 |         return []
13 | 
14 |     def available_splits(self, dataset_name: Optional[str] = None) -> List[str]:
15 |         return ["test"]
16 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/custom/runner.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import AbsEvalRunner
 2 | 
 3 | from .data_loader import CustomEvalDataLoader
 4 | 
 5 | 
 6 | class CustomEvalRunner(AbsEvalRunner):
 7 |     def load_data_loader(self) -> CustomEvalDataLoader:
 8 |         data_loader = CustomEvalDataLoader(
 9 |             eval_name=self.eval_args.eval_name,
10 |             dataset_dir=self.eval_args.dataset_dir,
11 |             cache_dir=self.eval_args.cache_path,
12 |             token=self.eval_args.token,
13 |             force_redownload=self.eval_args.force_redownload,
14 |         )
15 |         return data_loader
16 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/miracl/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import (
 2 |     AbsEvalArgs as MIRACLEvalArgs,
 3 |     AbsEvalModelArgs as MIRACLEvalModelArgs,
 4 | )
 5 | 
 6 | from .data_loader import MIRACLEvalDataLoader
 7 | from .runner import MIRACLEvalRunner
 8 | 
 9 | __all__ = [
10 |     "MIRACLEvalArgs",
11 |     "MIRACLEvalModelArgs",
12 |     "MIRACLEvalRunner",
13 |     "MIRACLEvalDataLoader",
14 | ]
15 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/miracl/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.miracl import (
 4 |     MIRACLEvalArgs, MIRACLEvalModelArgs,
 5 |     MIRACLEvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         MIRACLEvalArgs,
12 |         MIRACLEvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: MIRACLEvalArgs
17 |     model_args: MIRACLEvalModelArgs
18 | 
19 |     runner = MIRACLEvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/miracl/runner.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import AbsEvalRunner
 2 | 
 3 | from .data_loader import MIRACLEvalDataLoader
 4 | 
 5 | 
 6 | class MIRACLEvalRunner(AbsEvalRunner):
 7 |     """
 8 |     Evaluation runner of MIRACL.
 9 |     """
10 |     def load_data_loader(self) -> MIRACLEvalDataLoader:
11 |         """Load the data loader instance by args.
12 | 
13 |         Returns:
14 |             MIRACLEvalDataLoader: The MIRACL data loader instance.
15 |         """
16 |         data_loader = MIRACLEvalDataLoader(
17 |             eval_name=self.eval_args.eval_name,
18 |             dataset_dir=self.eval_args.dataset_dir,
19 |             cache_dir=self.eval_args.cache_path,
20 |             token=self.eval_args.token,
21 |             force_redownload=self.eval_args.force_redownload,
22 |         )
23 |         return data_loader
24 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mkqa/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import (
 2 |     AbsEvalArgs as MKQAEvalArgs,
 3 |     AbsEvalModelArgs as MKQAEvalModelArgs,
 4 | )
 5 | 
 6 | from .data_loader import MKQAEvalDataLoader
 7 | from .evaluator import MKQAEvaluator
 8 | from .runner import MKQAEvalRunner
 9 | 
10 | __all__ = [
11 |     "MKQAEvalArgs",
12 |     "MKQAEvalModelArgs",
13 |     "MKQAEvalRunner",
14 |     "MKQAEvalDataLoader",
15 |     "MKQAEvaluator"
16 | ]
17 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mkqa/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.mkqa import (
 4 |     MKQAEvalArgs, MKQAEvalModelArgs,
 5 |     MKQAEvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         MKQAEvalArgs,
12 |         MKQAEvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: MKQAEvalArgs
17 |     model_args: MKQAEvalModelArgs
18 | 
19 |     runner = MKQAEvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mldr/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import (
 2 |     AbsEvalArgs as MLDREvalArgs,
 3 |     AbsEvalModelArgs as MLDREvalModelArgs,
 4 | )
 5 | 
 6 | from .data_loader import MLDREvalDataLoader
 7 | from .runner import MLDREvalRunner
 8 | 
 9 | __all__ = [
10 |     "MLDREvalArgs",
11 |     "MLDREvalModelArgs",
12 |     "MLDREvalRunner",
13 |     "MLDREvalDataLoader",
14 | ]
15 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mldr/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.mldr import (
 4 |     MLDREvalArgs, MLDREvalModelArgs,
 5 |     MLDREvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         MLDREvalArgs,
12 |         MLDREvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: MLDREvalArgs
17 |     model_args: MLDREvalModelArgs
18 | 
19 |     runner = MLDREvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mldr/runner.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import AbsEvalRunner
 2 | 
 3 | from .data_loader import MLDREvalDataLoader
 4 | 
 5 | 
 6 | class MLDREvalRunner(AbsEvalRunner):
 7 |     """
 8 |     Evaluation runner of MIRACL.
 9 |     """
10 |     def load_data_loader(self) -> MLDREvalDataLoader:
11 |         """Load the data loader instance by args.
12 | 
13 |         Returns:
14 |             MLDREvalDataLoader: The MLDR data loader instance.
15 |         """
16 |         data_loader = MLDREvalDataLoader(
17 |             eval_name=self.eval_args.eval_name,
18 |             dataset_dir=self.eval_args.dataset_dir,
19 |             cache_dir=self.eval_args.cache_path,
20 |             token=self.eval_args.token,
21 |             force_redownload=self.eval_args.force_redownload,
22 |         )
23 |         return data_loader
24 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/msmarco/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import (
 2 |     AbsEvalArgs as MSMARCOEvalArgs,
 3 |     AbsEvalModelArgs as MSMARCOEvalModelArgs,
 4 | )
 5 | 
 6 | from .data_loader import MSMARCOEvalDataLoader
 7 | from .runner import MSMARCOEvalRunner
 8 | 
 9 | __all__ = [
10 |     "MSMARCOEvalArgs",
11 |     "MSMARCOEvalModelArgs",
12 |     "MSMARCOEvalRunner",
13 |     "MSMARCOEvalDataLoader",
14 | ]
15 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/msmarco/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.msmarco import (
 4 |     MSMARCOEvalArgs, MSMARCOEvalModelArgs,
 5 |     MSMARCOEvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         MSMARCOEvalArgs,
12 |         MSMARCOEvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: MSMARCOEvalArgs
17 |     model_args: MSMARCOEvalModelArgs
18 | 
19 |     runner = MSMARCOEvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/msmarco/runner.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import AbsEvalRunner
 2 | 
 3 | from .data_loader import MSMARCOEvalDataLoader
 4 | 
 5 | 
 6 | class MSMARCOEvalRunner(AbsEvalRunner):
 7 |     """
 8 |     Evaluation runner of MSMARCO.
 9 |     """
10 |     def load_data_loader(self) -> MSMARCOEvalDataLoader:
11 |         """Load the data loader instance by args.
12 | 
13 |         Returns:
14 |             MSMARCOEvalDataLoader: The MSMARCO data loader instance.
15 |         """
16 |         data_loader = MSMARCOEvalDataLoader(
17 |             eval_name=self.eval_args.eval_name,
18 |             dataset_dir=self.eval_args.dataset_dir,
19 |             cache_dir=self.eval_args.cache_path,
20 |             token=self.eval_args.token,
21 |             force_redownload=self.eval_args.force_redownload,
22 |         )
23 |         return data_loader
24 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.evaluation import (
 2 |     AbsEvalModelArgs as MTEBEvalModelArgs,
 3 | )
 4 | 
 5 | from .arguments import MTEBEvalArgs
 6 | from .runner import MTEBEvalRunner
 7 | 
 8 | __all__ = [
 9 |     "MTEBEvalArgs",
10 |     "MTEBEvalModelArgs",
11 |     "MTEBEvalRunner",
12 | ]
13 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.evaluation.mteb import (
 4 |     MTEBEvalArgs, MTEBEvalModelArgs,
 5 |     MTEBEvalRunner
 6 | )
 7 | 
 8 | 
 9 | def main():
10 |     parser = HfArgumentParser((
11 |         MTEBEvalArgs,
12 |         MTEBEvalModelArgs
13 |     ))
14 | 
15 |     eval_args, model_args = parser.parse_args_into_dataclasses()
16 |     eval_args: MTEBEvalArgs
17 |     model_args: MTEBEvalModelArgs
18 | 
19 |     runner = MTEBEvalRunner(
20 |         eval_args=eval_args,
21 |         model_args=model_args
22 |     )
23 | 
24 |     runner.run()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/arguments.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import List
 3 | 
 4 | from FlagEmbedding.abc.evaluation.arguments import AbsEvalArgs
 5 | 
 6 | 
 7 | @dataclass
 8 | class MTEBEvalArgs(AbsEvalArgs):
 9 |     """
10 |     Argument class for MTEB evaluation.
11 |     """
12 |     languages: List[str] = field(
13 |         default=None, metadata={"help": "Languages to evaluate. Default: eng"}
14 |     )
15 |     tasks: List[str] = field(
16 |         default=None, metadata={"help": "Tasks to evaluate. Default: None"}
17 |     )
18 |     task_types: List[str] = field(
19 |         default=None, metadata={"help": "The task types to evaluate. Default: None"}
20 |     )
21 |     use_special_instructions: bool = field(
22 |         default=False, metadata={"help": "Whether to use specific instructions in `prompts.py` for evaluation. Default: False"}
23 |     )
24 |     examples_path: str = field(
25 |         default=None, metadata={"help": "Use specific examples in the path. Default: None"}
26 |     )


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/AmazonCounterfactualClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "I wish I could have used this head set but the day I received it it wouldn't even turn on and I really wanted this product to work I'm very disappointed.","counterfactual"
3 | "I would advise that instead of trying to follow these poor instructions, Google it.","not-counterfactual"
4 | "I wrote to Monster customer service before ordering and they told me it would be fine to use without a converter and it was absolutely true.","not-counterfactual"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/AmazonPolarityClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "Hunting the Hard Way Thia was a gift for my Husband, who loved the book. It arrived on the date we were told it would.",positive
3 | "Poor DVD Has too many interviews with people at the Live THomas day in Penn. My kids were annoyed and hated this DVD.",negative
4 | "Ludicrous and silly I remember getting this book so faintly that that says alot about my opinion of it. Basically, while I will entertain lots of odd ideas and theories, this book was basically silly.",negative
5 | "Artistry I think that the Deodato concerts are very rich, as he used real strings and band musicians, as well as you can appreciate the John Tropea excelent renditions on guitar.",positive


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/AmazonReviewsClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "DO NOT ORDER THIS\n\nThis isn't what's described at all. Taking it out of the package lace was cut upon arrival, wig was cut to like 14 inch, not curly, and smelled like cigarettes. I obviously was sent what someone returned, disgusting.Not what I ordered at all, not pleased at all. I want my money back DO NOT ORDER","1 star"
3 | "And I can’t return it\n\nThis product seemed like good quality but it does not stay stuck to the soles at all. You walk a few steps and then you find the black shoe grip somewhere on the floor.","2 star"
4 | "Three Stars\n\nnew yearly subscription plan is horrible, but the product still works as it did in the past","3 star"
5 | "I like how it has lots of pockets to put stuff ...\n\nI like how it has lots of pockets to put stuff in. I would have liked to have a shorter securing strap so it would not slide around so much. Good product.","4 star"
6 | "Great\n\nIt is really good. That's my favorite. THANK YOU","5 star"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/ArxivClusteringS2S.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "A Survey on Graph Neural Networks: Algorithms and Applications",cs
3 | "Hamiltonian Dynamics and KAM Theory for Infinite-Dimensional Systems",math
4 | "Dark Matter Distribution in Dwarf Spheroidal Galaxies: Constraints from Stellar Kinematics",astro-ph
5 | "Decoherence and Quantum Error Correction in Topological Quantum Computers",quant-ph
6 | "Spin-Orbit Coupling Effects in Low-Dimensional Quantum Materials",cond-mat


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/AskUbuntuDupQuestions.csv:
--------------------------------------------------------------------------------
1 | query,positive
2 | angularjs infinite scroll in a container,AngularJS ng-infinite-scroll not working on a specific container/div
3 | Java: Efficiently converting an array of longs to an array of bytes,Most Compact way to Serialize an Array of Longs in Java
4 | PyVISA missing methods,NI VISA + pyVisa on Mac OS X (Snow Leopard)


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/BIOSSES.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "Recent studies have highlighted the crucial role of p53 in regulating cell cycle progression.","Recent research underscores p53's pivotal function in controlling cellular division."
3 | "Neuroscience has revealed intricate pathways linking dopamine to reward and motivation.","Recent neuroscientific findings have illuminated complex dopamine pathways associated with motivation and reward."
4 | "Stem cell research holds promise for treating a variety of degenerative diseases.","The potential of stem cell research in combating degenerative illnesses is widely recognized."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/Banking77Classification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "What is my money worth in other countries?",exchange_rate
3 | "What can I do if my card still hasn't arrived after 2 weeks?",card_arrival
4 | "Would I be able to open an account for my daughter?",age_limit
5 | "My address details have changed and I want to update them",edit_personal_details
6 | "If my cash withdrawal is still not showing, is something wrong?",pending_cash_withdrawal
7 | "How long do transfers typically take? Is there a way of speeding the process up? My friend needs the money I sent her desperately.",transfer_not_received_by_recipient


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/BiorxivClusteringS2S.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "Neural Circuit Dynamics in Decision-Making: A Computational Model of Prefrontal-Striatal Interactions",neuroscience
3 | "Metagenomic Insights into Extreme Environments: Microbial Diversity and Functional Adaptations in Antarctic Lakes",microbiology
4 | "Machine Learning Approaches for Predicting Protein Structure and Function from Sequence Data",bioinformatics
5 | "Regulation of Stem Cell Fate Decisions by the Hippo Signaling Pathway: Implications for Tissue Regeneration and Cancer Therapy",cell biology
6 | "Optical Tweezers and Single-Molecule Force Spectroscopy: Probing Protein Folding Dynamics and Mechanical Properties of Biomolecules",biophysics


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/CQADupstack.csv:
--------------------------------------------------------------------------------
1 | query,positive
2 | angularjs infinite scroll in a container,AngularJS ng-infinite-scroll not working on a specific container/div
3 | Java: Efficiently converting an array of longs to an array of bytes,Most Compact way to Serialize an Array of Longs in Java
4 | PyVISA missing methods,NI VISA + pyVisa on Mac OS X (Snow Leopard)
5 | 


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/CQADupstackRetrieval.csv:
--------------------------------------------------------------------------------
1 | query,pos


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/EmotionClassification.csv:
--------------------------------------------------------------------------------
1 | text,label_text
2 | "i am bothered is that he might changed his feelings once he get back in us and leave me heartbroken",sadness
3 | "i have always loved my jobs and loved to work and i truly feel like being back there with my patients and co workers will do me a lot of good even if it is only for a few weeks",joy
4 | "i certainly feel loved and appreciated and grateful for all that i have",love
5 | "im grabbing a minute to post i feel greedy wrong",anger
6 | "i was stymied a little bit as i wrote feeling unsure that i might go somewhere with the story unintended",fear
7 | "i keep feeling pleasantly surprised at his supportiveness and also his ease in new situations",surprise


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/HotpotQA.csv:
--------------------------------------------------------------------------------
1 | query,pos 
2 | "Which tennis player Anna-Lena Grönefeld or Mats Wilander turned professional first ?","Anna-Lena Grönefeld  Anna-Lena Grönefeld (born 4 June 1985) is a German tennis player. She turned professional in April 2003."
3 | "What South Korean K-pop group has 13 members and their own online TV program?","Seventeen (band)  Seventeen (Hangul: 세븐틴 ), also stylized as SEVENTEEN or SVT, is a South Korean boy group formed by Pledis Entertainment in 2015. The group consists of thirteen members who are separated into three sub-units, each with different areas of specialization: a 'Hip-Hop Unit', 'Vocal Unit', and 'Performance Unit'. They have released one studio album and four extended plays."
4 | "The game show Keep It in the Family was hosted by an actor that played what role in "Coronation Street"?","Keep It in the Family (UK game show)  Keep It in the Family is a British game show that aired on ITV from 26 October 2014 to 19 December 2015 and is hosted by Bradley Walsh."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/ImdbClassification.csv:
--------------------------------------------------------------------------------
1 | text,label_text
2 | "Renny Harlin's first American film was one of the best of a slew of prison-set horror films(like 'Death House' or 'The Chair')in the late 80's.Twenty years before,guard Lane Smith had wrongfully executed a condemned man.Now,he is the warden of the newly re-opened prison,and the man's ghost is back for bloody revenge.This atmospheric and very moody film features lots of gruesome gore and violence.Viggo Mortensen,Tiny Lister,Tom Everett and Kane Hodder are onhand for the entertaining carnage.","positive"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/MSMARCO.csv:
--------------------------------------------------------------------------------
1 | query,pos
2 | "what is a pms color","PMS is a solid-color matching system, used primarily for specifying second or third colors in printing, meaning colors in addition to black, (although, obviously, one can certainly print a one-color piece using a PMS color and no black all)."
3 | "when was snowboarding invented","Snowboarding Modern snowboarding began in 1965 when Sherman Poppen, an engineer in Muskegon, Michigan, invented a toy for his daughters by fastening two skis together and attaching a rope to one end so he would have some control as they stood on the board and glided downhill."
4 | "difference between pollination fertilization","What is the difference between pollination & fertilization in flowering plants? • Pollination is a process flowering plants only undergo. It is the transfer of pollen to the plant’s stigma. The process can be done by the plant itself or through outside agents. • Fertilization is basically the joining of sperm and egg."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/MTOPDomainClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "I am no longer available",calling
3 | "Cancel my reminder about my dentist appointment",reminder
4 | "Will it rain tomorrow?",weather
5 | "Create an appointment alarm for 11:30am.",allarm
6 | "Play a different playlist",music
7 | "What's the best way to fry chicken",recipes
8 | "what city does Ahmed live in ?",people


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/MTOPIntentClassification.csv:
--------------------------------------------------------------------------------
1 | text,label	
2 | "When will my next alarm start",GET_ALARM
3 | "I need you to message Zachary Fletcher",SEND_MESSAGE
4 | "show me video messages from Atlas",GET_MESSAGE
5 | "I want to listen to AC/DC please",PLAY_MUSIC
6 | "Make an alarm for the next 7 weeks for Thursday at 6pm",CREATE_ALARM
7 | "fairs happening in ann arbor next week",GET_EVENT
8 | "Will we get a frost this week?",GET_WEATHER


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/MassiveIntentClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "remind me to pay rent every month",calendar_set
3 | "please play yesterday from beatles",play_music
4 | "what will the temperatures be for the next week",weather_query
5 | "give me the detailed schedule for next week",calendar_query
6 | "what's happening in my day",general_quirky
7 | "dolores how was your day",general_quirky
8 | "who was appointed as deputy centimeter of uttar pradesh",qa_factoid
9 | "find me news about trumps speech",news_query


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/MassiveScenarioClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "can you confirm that my meeting for tomorrow has been canceled",calendar
3 | "please open my music application and play games by disturbed",play
4 | "what's the word orange mean",qa
5 | "find me all mails from magda with holidays word in the title",email
6 | "get a cup of coffee ready now",iot
7 | "good morning olly",general


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/MedrxivClusteringS2S.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "Evaluating the Efficacy of New Therapeutic Agents in the Management of Hypertension-Induced Kidney Damage",nephrology
3 | "Exploring the Relationship Between ICU Staffing Levels and Patient Outcomes in Severe Trauma Cases",intensive care and critical care medicine
4 | "The Impact of Environmental Allergens on Pediatric Asthma and Ear Infections",otolaryngology
5 | "Patient-Reported Outcomes in Rehabilitation: The Importance of Psychosocial Factors in Recovery",rehabilitation medicine and physical therapy
6 | "The Role of Micronutrients in Supporting Immune Function During Viral Infections",nutrition


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/MindSmallReranking.csv:
--------------------------------------------------------------------------------
1 | query,pos
2 | "'Wheel Of Fortune' Guest Delivers Hilarious, Off The Rails Introduction","Charles Rogers, former Michigan State football, Detroit Lions star, dead at 38"
3 | "Eliud Kipchoge runs 1:59 marathon, first to break 2 hours","AP-NORC poll: Many youths say high school diploma is enough"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/QuoraRetrieval.csv:
--------------------------------------------------------------------------------
1 | query,pos
2 | "Why do people say Dhanush (South Indian actor) is ugly? I don't think so.?","Why do people say Dhanush (South Indian actor) is ugly? I don't think so?"
3 | "What are some hit and nice ideas about architecture dissertation topics?","What are some interesting undergraduate architecture thesis topics?"
4 | "Could someone please motivate me?","Can you motivate me?"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/RedditClustering.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "Financial Meltdown: Strategies for Surviving Economic Collapse",collapse.txt
3 | "Exclusive Comic Book Sale: Don't Miss Out on January 13th!",comicbooks.txt
4 | "Tchaikovsky's Untold Story: The Mystery Behind Symphony No. 7",classicalmusic.txt
5 | "Coffee Addiction: When It's More Than Just a Drink",Coffee.txt


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/SICK-R.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "The cat is lounging on the sunny windowsill.","The feline is resting on the sunny windowsill."
3 | "A woman is reading a book while sitting on a bench.","A lady is reading a book while seated on a bench."
4 | "The child is drawing with crayons on a piece of paper.","The kid is using crayons to draw on a sheet of paper."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/STS12.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "A man is dancing on the ceiling.","A man is dancing on the ceiling of a room."
3 | "That is a shameful state of affairs when we consider that the EU itself is a champion of modernised business practice.","It is a shame when it is thought that the European Union is posed as a champion modernization of the economic life!"
4 | "Spain has done a magnificent job in turning round the difficult neighbourly relations which Europe and North Africa and Spain and Morocco have suffered during the course of history.","Spain has developed a remarkably positive the difficult neighbourhood which has always existed between Europe and North Africa and between Spain and Morocco."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/STS13.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "the state of being exposed to danger or harm","the condition of being at risk of injury or loss."
3 | "a set of instructions for a computer","directions given to a computer to perform a specific task."
4 | "a building used for public worship","a place where people gather to worship collectively."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/STS14.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "president obama vows to work with congress on immigration reform .","obama pledges to collaborate with congress on immigration overhaul ."
3 | "britain votes to leave european union .","uk votes to leave eu ."
4 | "russian president putin signs law banning adoption of russian children by u.s. citizens .","putin bans u.s. adoptions of russian children ."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/STS15.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "The battery and bulb A are not in the same path","Bulb A and the battery are not in the same circuit."
3 | "Switch Y and bulb B are in the same loop","Switch Y and bulb B belong to the same circuit."
4 | "new york city marathon canceled due to hurricane sandy","nyc marathon canceled because of hurricane sandy"
5 | "pope francis calls for peace in syria during sunday address","pope francis appeals for peace in syria in his sunday speech"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/STS16.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "what are the symptoms of a heart attack ?","what are the signs of a heart attack ?"
3 | "how do i change a flat tire on my car ?","what steps should i take to replace a flat tire ?"
4 | "how do i cook a medium rare steak ?","what's the best way to prepare a steak to medium rare ?"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/STS17.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "The sun is setting over the mountains.", "The sun sets behind the mountains."
3 | "A child is playing with a red ball.", "A kid plays with a red ball."
4 | "Two people are sitting on a bench in the park.", "Two individuals are seated on a bench in the park."


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/STSBenchmark.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "Agribusiness: Mad cow disease found in California","USDA Confirms Case of Mad Cow Disease in California"
3 | "santos stated colombian police found the evidence in 2 computers discovered with slain rebel leader raul reyes. ","francisco santos stated that colombian police found the evidence on two computers discovered with raul reyes."
4 | "US Attorney General Holder resigns","US Attorney general Eric Holder to resign"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/SciDocsRR.csv:
--------------------------------------------------------------------------------
1 | query,pos
2 | "Intelligent Word-Based Spam Filter Detection Using Multi-Neural Networks","Efficient Harmful Email identification Using Neural Network"
3 | "Importance of sediments in understanding nutrient cyclings in lakes","Raphidiopsis mediterranea Skuja represents non-heterocytous life-cycle stages of Cylindrospermopsis raciborskii (Woloszynska) Seenayya et Subba Raju in Lake Kastoria (Greece), its type locality: Evidence by morphological and phylogenetic analysis"
4 | "Adult playfulness and its relationship to humour , subjective happiness and depression : A comparative study of Hong Kong and Mainland China","Rapid assessment of well-being: The Short Depression-Happiness Scale (SDHS)."
5 | "In depth performance evaluation of LTE-M for M2M communications","Simulating LTE Cellular Systems: An Open-Source Framework"
6 | "Marketing segmentation using support vector clustering","Support vector clustering"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/SprintDuplicateQuestions.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "Kyocera duraforce pro international roaming settings","Make a call while roaming internationally - Kyocera DuraForce PRO"
3 | "Guide for connecting to the Sprint U301 USB mobile broadband","Turn automatic connections on or off - Sprint U301 USB Device Sprint 3G/4G Mobile Broadband"
4 | "What do you think is a reason that is preventing troubleshooting on my HTC One A9 related to issues to the mobile hotspots ?","Troubleshoot issues related to mobile hotspots and your HTC One A9"
5 | "Why has my Samsung Transform been freezing everytime I attempt to open up an app ?","Why is my Samsung Transform freezing or being unresponsive ?"
6 | "What can I do to turn on Wi-Fi on the HTC One A9 ?","Turn on and connect to Wi-Fi - HTC One A9"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/StackExchangeClustering.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "Recommendations for a lightweight Markdown editor with real-time collaboration features?",softwarerecs.stackexchange.com.txt
3 | "How to integrate external APIs with EOSIO blockchain applications?",eosio.stackexchange.com.txt
4 | "How to balance macros for effective fat loss and muscle retention?",fitness.stackexchange.com.txt
5 | "Can "amans" be used as a substantival participle in Latin?",latin.stackexchange.com.txt


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/StackOverflowDupQuestions.csv:
--------------------------------------------------------------------------------
1 | query,pos
2 | "How to handle onChange event in React when state changes programmatically?","React onChange event not firing when state is updated programmatically"
3 | "How to simulate a click event on a button using JavaScript?","JavaScript button click event simulation"
4 | "Python: How to run a function asynchronously using asyncio?","Asyncio: Running Python function asynchronously"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/ToxicConversationsClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "Pull your little head out of your big ass areola!","toxic"
3 | "Trudeau will survive this alright as, unfortunately, Canadians are amongst the most dumbed down on the earth thanks to no real media other than his lapdogs.","toxic"
4 | "No evidence of one ever starting a fire.  Better ban them.","not toxic"
5 | "What's this about Andrew Jackson and the civil war?  And now he is saying he would be "honored" to meet with NK dictator Kim Jung Un.  Is the pressure of being president getting to him?  I am worried.","not toxic"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/TweetSentimentExtractionClassification.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "I`d have responded, if I were going",neutral
3 | "what interview! leave me alone",negative
4 | "2am feedings for the baby are fun when he is all smiles and coos",positive
5 | "is cleaning the house for her family who is comming later today..",neutral
6 | "Sick. With a flu like thing.",negative
7 | "We saw that in none 3D - the baddie`s the best",positive


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/TwentyNewsgroupsClustering.csv:
--------------------------------------------------------------------------------
1 | text,label
2 | "Major flaw discovered in widely-used encryption protocol",sci.crypt
3 | "Bruins' Unstoppable Winning Streak",rec.sport.hockey
4 | "Troubleshooting a Digital Multimeter Calibration Issue",sci.electronics
5 | "Understanding DPI Scaling in X Window Systems",comp.windows.x


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/TwitterSemEval2015.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "Excited for the new Game of Thrones episode tonight!","Can't wait for tonight's Game of Thrones episode!"
3 | "Just finished a 5k run and feel amazing!","Completed a 5k run and I'm feeling great!"
4 | "Had an incredible dinner at Joe's Italian Restaurant.","Joe's Italian Restaurant served an amazing dinner tonight."
5 | "I need a vacation. Can't wait to hit the beach.","Desperately need a holiday. Looking forward to beach time."
6 | "The new iPhone has some fantastic features!","Loving the features on the new iPhone!"


--------------------------------------------------------------------------------
/FlagEmbedding/evaluation/mteb/examples/TwitterURLCorpus.csv:
--------------------------------------------------------------------------------
1 | sent1,sent2
2 | "Elon Musk says Tesla will be profitable next quarter.","Elon Musk claims Tesla will turn a profit next quarter."
3 | "The new iPhone just got announced and it's amazing.","Apple just unveiled the new iPhone and it's incredible."
4 | "Beyoncé's new album has topped the charts in its first week.","Beyoncé's latest album debuted at number one on the charts."
5 | "Breaking: Major earthquake hits California.","Just in: Large earthquake strikes California."
6 | "NASA plans to send humans to Mars by 2030.","NASA aims to have astronauts on Mars by the year 2030."


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/finetune/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/finetune/embedder/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/decoder_only/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/finetune/embedder/decoder_only/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/decoder_only/base/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.finetune.embedder import (
 2 |     AbsEmbedderDataArguments as DecoderOnlyEmbedderDataArguments,
 3 |     AbsEmbedderTrainingArguments as DecoderOnlyEmbedderTrainingArguments,
 4 | )
 5 | 
 6 | from .arguments import DecoderOnlyEmbedderModelArguments
 7 | from .modeling import BiDecoderOnlyEmbedderModel
 8 | from .trainer import DecoderOnlyEmbedderTrainer
 9 | from .runner import DecoderOnlyEmbedderRunner
10 | 
11 | __all__ = [
12 |     'DecoderOnlyEmbedderDataArguments',
13 |     'DecoderOnlyEmbedderTrainingArguments',
14 |     'DecoderOnlyEmbedderModelArguments',
15 |     'BiDecoderOnlyEmbedderModel',
16 |     'DecoderOnlyEmbedderTrainer',
17 |     'DecoderOnlyEmbedderRunner',
18 | ]
19 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/decoder_only/base/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.finetune.embedder.decoder_only.base import (
 4 |     DecoderOnlyEmbedderDataArguments,
 5 |     DecoderOnlyEmbedderTrainingArguments,
 6 |     DecoderOnlyEmbedderModelArguments,
 7 |     DecoderOnlyEmbedderRunner,
 8 | )
 9 | 
10 | 
11 | def main():
12 |     parser = HfArgumentParser((
13 |         DecoderOnlyEmbedderModelArguments,
14 |         DecoderOnlyEmbedderDataArguments,
15 |         DecoderOnlyEmbedderTrainingArguments
16 |     ))
17 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
18 |     model_args: DecoderOnlyEmbedderModelArguments
19 |     data_args: DecoderOnlyEmbedderDataArguments
20 |     training_args: DecoderOnlyEmbedderTrainingArguments
21 | 
22 |     runner = DecoderOnlyEmbedderRunner(
23 |         model_args=model_args,
24 |         data_args=data_args,
25 |         training_args=training_args
26 |     )
27 |     runner.run()
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/decoder_only/icl/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.finetune.embedder import (
 2 |     AbsEmbedderTrainingArguments as DecoderOnlyEmbedderICLTrainingArguments,
 3 | )
 4 | 
 5 | from .arguments import (
 6 |     DecoderOnlyEmbedderICLModelArguments,
 7 |     DecoderOnlyEmbedderICLDataArguments
 8 | )
 9 | from .dataset import (
10 |     DecoderOnlyEmbedderICLSameDatasetTrainDataset,
11 |     AbsEmbedderSameDatasetCollator
12 | )
13 | from .modeling import BiDecoderOnlyEmbedderICLModel
14 | from .trainer import DecoderOnlyEmbedderICLTrainer
15 | from .runner import DecoderOnlyEmbedderICLRunner
16 | 
17 | __all__ = [
18 |     'DecoderOnlyEmbedderICLModelArguments',
19 |     'DecoderOnlyEmbedderICLDataArguments',
20 |     'DecoderOnlyEmbedderICLTrainingArguments',
21 |     'BiDecoderOnlyEmbedderICLModel',
22 |     'DecoderOnlyEmbedderICLTrainer',
23 |     'DecoderOnlyEmbedderICLRunner',
24 | ]
25 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/decoder_only/icl/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.finetune.embedder.decoder_only.icl import (
 4 |     DecoderOnlyEmbedderICLDataArguments,
 5 |     DecoderOnlyEmbedderICLTrainingArguments,
 6 |     DecoderOnlyEmbedderICLModelArguments,
 7 |     DecoderOnlyEmbedderICLRunner,
 8 | )
 9 | 
10 | 
11 | def main():
12 |     parser = HfArgumentParser((
13 |         DecoderOnlyEmbedderICLModelArguments,
14 |         DecoderOnlyEmbedderICLDataArguments,
15 |         DecoderOnlyEmbedderICLTrainingArguments
16 |     ))
17 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
18 |     model_args: DecoderOnlyEmbedderICLModelArguments
19 |     data_args: DecoderOnlyEmbedderICLDataArguments
20 |     training_args: DecoderOnlyEmbedderICLTrainingArguments
21 | 
22 |     runner = DecoderOnlyEmbedderICLRunner(
23 |         model_args=model_args,
24 |         data_args=data_args,
25 |         training_args=training_args
26 |     )
27 |     runner.run()
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/encoder_only/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/finetune/embedder/encoder_only/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/encoder_only/base/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.finetune.embedder import (
 2 |     AbsEmbedderModelArguments as EncoderOnlyEmbedderModelArguments,
 3 |     AbsEmbedderDataArguments as EncoderOnlyEmbedderDataArguments,
 4 |     AbsEmbedderTrainingArguments as EncoderOnlyEmbedderTrainingArguments,
 5 | )
 6 | 
 7 | from .modeling import BiEncoderOnlyEmbedderModel
 8 | from .trainer import EncoderOnlyEmbedderTrainer
 9 | from .runner import EncoderOnlyEmbedderRunner
10 | 
11 | __all__ = [
12 |     'EncoderOnlyEmbedderModelArguments',
13 |     'EncoderOnlyEmbedderDataArguments',
14 |     'EncoderOnlyEmbedderTrainingArguments',
15 |     'BiEncoderOnlyEmbedderModel',
16 |     'EncoderOnlyEmbedderTrainer',
17 |     'EncoderOnlyEmbedderRunner',
18 | ]
19 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/encoder_only/base/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.finetune.embedder.encoder_only.base import (
 4 |     EncoderOnlyEmbedderDataArguments,
 5 |     EncoderOnlyEmbedderTrainingArguments,
 6 |     EncoderOnlyEmbedderModelArguments,
 7 |     EncoderOnlyEmbedderRunner,
 8 | )
 9 | 
10 | 
11 | def main():
12 |     parser = HfArgumentParser((
13 |         EncoderOnlyEmbedderModelArguments,
14 |         EncoderOnlyEmbedderDataArguments,
15 |         EncoderOnlyEmbedderTrainingArguments
16 |     ))
17 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
18 |     model_args: EncoderOnlyEmbedderModelArguments
19 |     data_args: EncoderOnlyEmbedderDataArguments
20 |     training_args: EncoderOnlyEmbedderTrainingArguments
21 | 
22 |     runner = EncoderOnlyEmbedderRunner(
23 |         model_args=model_args,
24 |         data_args=data_args,
25 |         training_args=training_args
26 |     )
27 |     runner.run()
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/encoder_only/m3/__init__.py:
--------------------------------------------------------------------------------
 1 | from FlagEmbedding.abc.finetune.embedder import AbsEmbedderDataArguments as EncoderOnlyEmbedderM3DataArguments
 2 | 
 3 | from .arguments import EncoderOnlyEmbedderM3ModelArguments, EncoderOnlyEmbedderM3TrainingArguments
 4 | from .modeling import EncoderOnlyEmbedderM3Model, EncoderOnlyEmbedderM3ModelForInference
 5 | from .trainer import EncoderOnlyEmbedderM3Trainer
 6 | from .runner import EncoderOnlyEmbedderM3Runner
 7 | 
 8 | 
 9 | __all__ = [
10 |     'EncoderOnlyEmbedderM3ModelArguments',
11 |     'EncoderOnlyEmbedderM3DataArguments',
12 |     'EncoderOnlyEmbedderM3TrainingArguments',
13 |     'EncoderOnlyEmbedderM3Model',
14 |     'EncoderOnlyEmbedderM3ModelForInference',
15 |     'EncoderOnlyEmbedderM3Trainer',
16 |     'EncoderOnlyEmbedderM3Runner',
17 | ]
18 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/encoder_only/m3/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.finetune.embedder.encoder_only.m3 import (
 4 |     EncoderOnlyEmbedderM3DataArguments,
 5 |     EncoderOnlyEmbedderM3TrainingArguments,
 6 |     EncoderOnlyEmbedderM3ModelArguments,
 7 |     EncoderOnlyEmbedderM3Runner,
 8 | )
 9 | 
10 | 
11 | def main():
12 |     parser = HfArgumentParser((EncoderOnlyEmbedderM3ModelArguments, EncoderOnlyEmbedderM3DataArguments, EncoderOnlyEmbedderM3TrainingArguments))
13 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
14 |     model_args: EncoderOnlyEmbedderM3ModelArguments
15 |     data_args: EncoderOnlyEmbedderM3DataArguments
16 |     training_args: EncoderOnlyEmbedderM3TrainingArguments
17 | 
18 |     runner = EncoderOnlyEmbedderM3Runner(
19 |         model_args=model_args,
20 |         data_args=data_args,
21 |         training_args=training_args
22 |     )
23 |     runner.run()
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/embedder/encoder_only/m3/arguments.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | from FlagEmbedding.abc.finetune.embedder import (
 4 |     AbsEmbedderTrainingArguments,
 5 |     AbsEmbedderModelArguments
 6 | )
 7 | 
 8 | 
 9 | @dataclass
10 | class EncoderOnlyEmbedderM3ModelArguments(AbsEmbedderModelArguments):
11 |     """
12 |     Model argument class for M3.
13 |     """
14 |     colbert_dim: int = field(default=-1, metadata={"help": "Dim of colbert linear"})
15 | 
16 | 
17 | @dataclass
18 | class EncoderOnlyEmbedderM3TrainingArguments(AbsEmbedderTrainingArguments):
19 |     """
20 |     Training argument class for M3.
21 |     """
22 |     unified_finetuning: bool = field(default=False, metadata={"help": "use unify fine-tuning"})
23 |     use_self_distill: bool = field(default=False, metadata={"help": "use self-distill when using unify fine-tuning"})
24 |     fix_encoder: bool = field(default=False, metadata={"help": "Freeze the parameters of encoder"})
25 |     self_distill_start_step: int = field(default=-1, metadata={"help": "Num of step when using self-distill"})
26 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/finetune/reranker/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/decoder_only/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/finetune/reranker/decoder_only/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/decoder_only/base/__init__.py:
--------------------------------------------------------------------------------
 1 | from .modeling import CrossDecoderModel
 2 | from .runner import DecoderOnlyRerankerRunner
 3 | from .arguments import RerankerModelArguments
 4 | from .trainer import DecoderOnlyRerankerTrainer
 5 | 
 6 | __all__ = [
 7 |     "CrossDecoderModel",
 8 |     "DecoderOnlyRerankerRunner",
 9 |     "DecoderOnlyRerankerTrainer",
10 |     "RerankerModelArguments",
11 | ]
12 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/decoder_only/base/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.abc.finetune.reranker import (
 4 |     AbsRerankerDataArguments,
 5 |     AbsRerankerTrainingArguments
 6 | )
 7 | 
 8 | from FlagEmbedding.finetune.reranker.decoder_only.base import (
 9 |     DecoderOnlyRerankerRunner,
10 |     RerankerModelArguments
11 | )
12 | 
13 | 
14 | def main():
15 |     parser = HfArgumentParser((RerankerModelArguments, AbsRerankerDataArguments, AbsRerankerTrainingArguments))
16 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
17 |     model_args: RerankerModelArguments
18 |     data_args: AbsRerankerDataArguments
19 |     training_args: AbsRerankerTrainingArguments
20 | 
21 |     runner = DecoderOnlyRerankerRunner(
22 |         model_args=model_args,
23 |         data_args=data_args,
24 |         training_args=training_args
25 |     )
26 |     runner.run()
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     main()
31 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/decoder_only/layerwise/__init__.py:
--------------------------------------------------------------------------------
 1 | from .modeling import CrossDecoderModel
 2 | from .runner import DecoderOnlyRerankerRunner
 3 | from .arguments import RerankerModelArguments
 4 | from .trainer import DecoderOnlyRerankerTrainer
 5 | 
 6 | __all__ = [
 7 |     "CrossDecoderModel",
 8 |     "DecoderOnlyRerankerRunner",
 9 |     "DecoderOnlyRerankerTrainer",
10 |     "RerankerModelArguments",
11 | ]
12 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/decoder_only/layerwise/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.abc.finetune.reranker import (
 4 |     AbsRerankerDataArguments,
 5 |     AbsRerankerTrainingArguments
 6 | )
 7 | 
 8 | from FlagEmbedding.finetune.reranker.decoder_only.layerwise import (
 9 |     DecoderOnlyRerankerRunner,
10 |     RerankerModelArguments
11 | )
12 | 
13 | 
14 | def main():
15 |     parser = HfArgumentParser((RerankerModelArguments, AbsRerankerDataArguments, AbsRerankerTrainingArguments))
16 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
17 |     model_args: RerankerModelArguments
18 |     data_args: AbsRerankerDataArguments
19 |     training_args: AbsRerankerTrainingArguments
20 | 
21 |     runner = DecoderOnlyRerankerRunner(
22 |         model_args=model_args,
23 |         data_args=data_args,
24 |         training_args=training_args
25 |     )
26 |     runner.run()
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     main()
31 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/encoder_only/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/finetune/reranker/encoder_only/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/encoder_only/base/__init__.py:
--------------------------------------------------------------------------------
 1 | from .modeling import CrossEncoderModel
 2 | from .runner import EncoderOnlyRerankerRunner
 3 | from .trainer import EncoderOnlyRerankerTrainer
 4 | 
 5 | __all__ = [
 6 |     "CrossEncoderModel",
 7 |     "EncoderOnlyRerankerRunner",
 8 |     "EncoderOnlyRerankerTrainer"
 9 | ]
10 | 


--------------------------------------------------------------------------------
/FlagEmbedding/finetune/reranker/encoder_only/base/__main__.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.abc.finetune.reranker import (
 4 |     AbsRerankerModelArguments,
 5 |     AbsRerankerDataArguments,
 6 |     AbsRerankerTrainingArguments
 7 | )
 8 | from FlagEmbedding.finetune.reranker.encoder_only.base import EncoderOnlyRerankerRunner
 9 | 
10 | 
11 | def main():
12 |     parser = HfArgumentParser((AbsRerankerModelArguments, AbsRerankerDataArguments, AbsRerankerTrainingArguments))
13 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
14 |     model_args: AbsRerankerModelArguments
15 |     data_args: AbsRerankerDataArguments
16 |     training_args: AbsRerankerTrainingArguments
17 | 
18 |     runner = EncoderOnlyRerankerRunner(
19 |         model_args=model_args,
20 |         data_args=data_args,
21 |         training_args=training_args
22 |     )
23 |     runner.run()
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/FlagEmbedding/inference/__init__.py:
--------------------------------------------------------------------------------
 1 | from .auto_embedder import FlagAutoModel
 2 | from .auto_reranker import FlagAutoReranker
 3 | from .embedder import (
 4 |     FlagModel, BGEM3FlagModel,
 5 |     FlagICLModel, FlagLLMModel,
 6 |     EmbedderModelClass
 7 | )
 8 | from .reranker import (
 9 |     FlagReranker,
10 |     FlagLLMReranker, LayerWiseFlagLLMReranker, LightWeightFlagLLMReranker,
11 |     RerankerModelClass
12 | )
13 | 
14 | 
15 | __all__ = [
16 |     "FlagAutoModel",
17 |     "FlagAutoReranker",
18 |     "EmbedderModelClass",
19 |     "RerankerModelClass",
20 |     "FlagModel",
21 |     "BGEM3FlagModel",
22 |     "FlagICLModel",
23 |     "FlagLLMModel",
24 |     "FlagReranker",
25 |     "FlagLLMReranker",
26 |     "LayerWiseFlagLLMReranker",
27 |     "LightWeightFlagLLMReranker",
28 | ]
29 | 


--------------------------------------------------------------------------------
/FlagEmbedding/inference/embedder/__init__.py:
--------------------------------------------------------------------------------
 1 | from .encoder_only import FlagModel, BGEM3FlagModel
 2 | from .decoder_only import FlagICLModel, FlagLLMModel
 3 | from .model_mapping import EmbedderModelClass
 4 | 
 5 | __all__ = [
 6 |     "FlagModel",
 7 |     "BGEM3FlagModel",
 8 |     "FlagICLModel",
 9 |     "FlagLLMModel",
10 |     "EmbedderModelClass",
11 | ]
12 | 


--------------------------------------------------------------------------------
/FlagEmbedding/inference/embedder/decoder_only/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseLLMEmbedder as FlagLLMModel
2 | from .icl import ICLLLMEmbedder as FlagICLModel
3 | 
4 | __all__ = [
5 |     "FlagLLMModel",
6 |     "FlagICLModel",
7 | ]
8 | 


--------------------------------------------------------------------------------
/FlagEmbedding/inference/embedder/encoder_only/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseEmbedder as FlagModel
2 | from .m3 import M3Embedder as BGEM3FlagModel
3 | 
4 | __all__ = [
5 |     "FlagModel",
6 |     "BGEM3FlagModel",
7 | ]
8 | 


--------------------------------------------------------------------------------
/FlagEmbedding/inference/reranker/__init__.py:
--------------------------------------------------------------------------------
 1 | from .decoder_only import FlagLLMReranker, LayerWiseFlagLLMReranker, LightWeightFlagLLMReranker
 2 | from .encoder_only import FlagReranker
 3 | from .model_mapping import RerankerModelClass
 4 | 
 5 | __all__ = [
 6 |     "FlagReranker",
 7 |     "FlagLLMReranker",
 8 |     "LayerWiseFlagLLMReranker",
 9 |     "LightWeightFlagLLMReranker",
10 |     "RerankerModelClass",
11 | ]
12 | 


--------------------------------------------------------------------------------
/FlagEmbedding/inference/reranker/decoder_only/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseLLMReranker as FlagLLMReranker
 2 | from .layerwise import LayerWiseLLMReranker as LayerWiseFlagLLMReranker
 3 | from .lightweight import LightweightLLMReranker as LightWeightFlagLLMReranker
 4 | 
 5 | __all__ = [
 6 |     "FlagLLMReranker",
 7 |     "LayerWiseFlagLLMReranker",
 8 |     "LightWeightFlagLLMReranker"
 9 | ]
10 | 


--------------------------------------------------------------------------------
/FlagEmbedding/inference/reranker/decoder_only/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/FlagEmbedding/inference/reranker/decoder_only/models/__init__.py


--------------------------------------------------------------------------------
/FlagEmbedding/inference/reranker/encoder_only/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseReranker as FlagReranker
2 | 
3 | __all__ = [
4 |     "FlagReranker",
5 | ]
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 staoxiao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Manifest.in:
--------------------------------------------------------------------------------
1 | # Include the entire directory and its contents
2 | recursive-include FlagEmbedding/FlagEmbedding/visual/eva_clip *
3 | 
4 | # Include the specific file at the root level
5 | include bpe_simple_vocab_16e6.txt.gz
6 | 
7 | # Include all JSON files inside the specified directory
8 | recursive-include FlagEmbedding/visual/eva_clip/model_configs *.json
9 | 


--------------------------------------------------------------------------------
/Tutorials/7_Fine-tuning/config/ds_stage0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "zero_optimization": {
 3 |         "stage": 0
 4 |     },
 5 |     
 6 |     "fp16": {
 7 |         "enabled": "auto",
 8 |         "loss_scale": 0,
 9 |         "loss_scale_window": 1000,
10 |         "initial_scale_power": 12,
11 |         "hysteresis": 2,
12 |         "min_loss_scale": 1
13 |     },
14 | 
15 |     "bf16": {
16 |         "enabled": "auto"
17 |     },
18 | 
19 |     "optimizer": {
20 |         "type": "AdamW",
21 |         "params": {
22 |             "lr": "auto",
23 |             "betas": "auto",
24 |             "eps": "auto",
25 |             "weight_decay": "auto"
26 |         }
27 |     },
28 | 
29 |     "scheduler": {
30 |         "type": "WarmupDecayLR",
31 |         "params": {
32 |             "warmup_min_lr": "auto",
33 |             "warmup_max_lr": "auto",
34 |             "warmup_num_steps": "auto",
35 |             "total_num_steps": "auto"
36 |         }
37 |     },
38 | 
39 |     "gradient_accumulation_steps": "auto",
40 |     "gradient_clipping": "auto",
41 |     "steps_per_print": 100,
42 |     "train_batch_size": "auto",
43 |     "train_micro_batch_size_per_gpu": "auto",
44 |     "wall_clock_breakdown": false
45 | }


--------------------------------------------------------------------------------
/Tutorials/tutorial_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/Tutorials/tutorial_map.png


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | install the required pkgs:
 2 | ```
 3 | pip install -r requirements.txt
 4 | ```
 5 | 
 6 | 
 7 | to host the webpages locally:
 8 | ```
 9 | python -m http.server
10 | ```


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | myst-nb
3 | myst_parser
4 | sphinx-design
5 | pydata-sphinx-theme
6 | # furo


--------------------------------------------------------------------------------
/docs/source/API/abc.rst:
--------------------------------------------------------------------------------
1 | Abstract Class
2 | ==============
3 | 
4 | .. toctree::
5 |     abc/inference
6 |     abc/evaluation
7 |     abc/finetune


--------------------------------------------------------------------------------
/docs/source/API/abc/evaluation.rst:
--------------------------------------------------------------------------------
1 | Evaluation
2 | ==========
3 | 
4 | .. toctree::
5 |     evaluation/arguments
6 |     evaluation/data_loader
7 |     evaluation/searcher
8 |     evaluation/evaluator
9 |     evaluation/runner


--------------------------------------------------------------------------------
/docs/source/API/abc/evaluation/arguments.rst:
--------------------------------------------------------------------------------
1 | Arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.abc.evaluation.AbsEvalArgs
5 | 
6 | 
7 | .. autoclass:: FlagEmbedding.abc.evaluation.AbsEvalModelArgs


--------------------------------------------------------------------------------
/docs/source/API/abc/evaluation/evaluator.rst:
--------------------------------------------------------------------------------
1 | Evaluator
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.abc.evaluation.AbsEvaluator


--------------------------------------------------------------------------------
/docs/source/API/abc/evaluation/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.abc.evaluation.AbsEvalRunner


--------------------------------------------------------------------------------
/docs/source/API/abc/evaluation/searcher.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | searcher
 3 | ========
 4 | 
 5 | EvalRetriever
 6 | =============
 7 | 
 8 | .. autoclass:: FlagEmbedding.abc.evaluation.EvalRetriever
 9 | 
10 | EvalDenseRetriever
11 | ==================
12 | 
13 | .. autoclass:: FlagEmbedding.abc.evaluation.EvalDenseRetriever
14 | 
15 | EvalReranker
16 | ============
17 | 
18 | .. autoclass:: FlagEmbedding.abc.evaluation.EvalReranker


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune.rst:
--------------------------------------------------------------------------------
1 | Finetune
2 | ========
3 | 
4 | .. toctree::
5 |     finetune/embedder
6 |     finetune/reranker


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/embedder.rst:
--------------------------------------------------------------------------------
1 | Embedder
2 | ========
3 | 
4 | .. toctree::
5 |     embedder/AbsArguments
6 |     embedder/AbsDataset
7 |     embedder/AbsModeling
8 |     embedder/AbsTrainer
9 |     embedder/AbsRunner


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/embedder/AbsArguments.rst:
--------------------------------------------------------------------------------
1 | AbsArguments
2 | ============
3 | 
4 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModelArguments
5 | 
6 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsRerankerDataArguments
7 | 


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/embedder/AbsRunner.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | AbsRunner
 3 | =========
 4 | 
 5 | AbsEmbedderTrainer
 6 | ==================
 7 | 
 8 | .. autoclass:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderRunner
 9 | 
10 | Methods
11 | -------
12 | 
13 | .. automethod:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderRunner.load_tokenizer_and_model
14 | 
15 | .. automethod:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderRunner.load_trainer
16 | 
17 | .. automethod:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderRunner.load_train_dataset
18 | 
19 | .. automethod:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderRunner.load_data_collator
20 | 
21 | .. automethod:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderRunner.run


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/embedder/AbsTrainer.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | AbsTrainer
 3 | ==========
 4 | 
 5 | AbsEmbedderTrainer
 6 | ==================
 7 | 
 8 | .. autoclass:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderTrainer
 9 | 
10 | Methods
11 | -------
12 | 
13 | .. automethod:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderTrainer.compute_loss


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/reranker.rst:
--------------------------------------------------------------------------------
1 | Reranker
2 | ========
3 | 
4 | .. toctree::
5 |     reranker/AbsArguments
6 |     reranker/AbsDataset
7 |     reranker/AbsModeling
8 |     reranker/AbsTrainer
9 |     reranker/AbsRunner


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/reranker/AbsArguments.rst:
--------------------------------------------------------------------------------
1 | AbsArguments
2 | ============
3 | 
4 | .. autoclass:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderModelArguments
5 | 
6 | .. autoclass:: FlagEmbedding.abc.finetune.embedder.AbsEmbedderDataArguments
7 | 


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/reranker/AbsDataset.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | AbsDataset
 3 | ==========
 4 | 
 5 | AbsRerankerTrainDataset
 6 | =======================
 7 | 
 8 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsRerankerTrainDataset
 9 | 
10 | Methods
11 | -------
12 | 
13 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerTrainDataset.create_one_example
14 | 
15 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerTrainDataset._load_dataset
16 | 
17 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerTrainDataset._shuffle_text
18 | 
19 | AbsRerankerCollator
20 | ===================
21 | 
22 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsRerankerCollator
23 | 
24 | AbsLLMRerankerTrainDataset
25 | ==========================
26 | 
27 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsLLMRerankerTrainDataset
28 | 
29 | AbsLLMRerankerCollator
30 | ======================
31 | 
32 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsLLMRerankerCollator
33 | 


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/reranker/AbsModeling.rst:
--------------------------------------------------------------------------------
 1 | ===========
 2 | AbsModeling
 3 | ===========
 4 | 
 5 | AbsRerankerModel
 6 | ================
 7 | 
 8 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel
 9 | 
10 | Methods
11 | -------
12 | 
13 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel.encode
14 | 
15 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel.gradient_checkpointing_enable
16 | 
17 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel.enable_input_require_grads
18 | 
19 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel.forward
20 | 
21 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel.compute_loss
22 | 
23 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel.save
24 | 
25 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerModel.save_pretrained
26 | 
27 | 
28 | RerankerOutput
29 | ==============
30 | 
31 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.RerankerOutput


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/reranker/AbsRunner.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | AbsRunner
 3 | =========
 4 | 
 5 | AbsRerankerTrainer
 6 | ==================
 7 | 
 8 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsRerankerRunner
 9 | 
10 | Methods
11 | -------
12 | 
13 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerRunner.load_tokenizer_and_model
14 | 
15 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerRunner.load_trainer
16 | 
17 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerRunner.load_train_dataset
18 | 
19 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerRunner.load_data_collator
20 | 
21 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerRunner.run


--------------------------------------------------------------------------------
/docs/source/API/abc/finetune/reranker/AbsTrainer.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | AbsTrainer
 3 | ==========
 4 | 
 5 | AbsRerankerTrainer
 6 | ==================
 7 | 
 8 | .. autoclass:: FlagEmbedding.abc.finetune.reranker.AbsRerankerTrainer
 9 | 
10 | Methods
11 | -------
12 | 
13 | .. automethod:: FlagEmbedding.abc.finetune.reranker.AbsRerankerTrainer.compute_loss


--------------------------------------------------------------------------------
/docs/source/API/abc/inference.rst:
--------------------------------------------------------------------------------
1 | Inference
2 | =========
3 | 
4 | .. toctree::
5 |     inference/AbsEmbedder
6 |     inference/AbsReranker


--------------------------------------------------------------------------------
/docs/source/API/abc/inference/AbsEmbedder.rst:
--------------------------------------------------------------------------------
 1 | AbsEmbedder
 2 | ===========
 3 | 
 4 | .. autoclass:: FlagEmbedding.abc.inference.AbsEmbedder
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.get_target_devices
10 | 
11 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.get_detailed_instruct
12 | 
13 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.encode_queries
14 | 
15 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.encode_corpus
16 | 
17 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.encode
18 | 
19 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.encode_single_device
20 | 
21 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.start_multi_process_pool
22 | 
23 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder._encode_multi_process_worker
24 | 
25 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.stop_multi_process_pool
26 | 
27 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder.encode_multi_process
28 | 
29 | .. automethod:: FlagEmbedding.abc.inference.AbsEmbedder._concatenate_results_from_multi_process


--------------------------------------------------------------------------------
/docs/source/API/abc/inference/AbsReranker.rst:
--------------------------------------------------------------------------------
 1 | AbsReranker
 2 | ===========
 3 | 
 4 | .. autoclass:: FlagEmbedding.abc.inference.AbsReranker
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.get_target_devices
10 | 
11 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.get_detailed_instruct
12 | 
13 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.get_detailed_inputs
14 | 
15 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.compute_score
16 | 
17 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.compute_score_single_gpu
18 | 
19 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.start_multi_process_pool
20 | 
21 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.encode_multi_process
22 | 
23 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker._encode_multi_process_worker
24 | 
25 | .. automethod:: FlagEmbedding.abc.inference.AbsReranker.stop_multi_process_pool


--------------------------------------------------------------------------------
/docs/source/API/evaluation.rst:
--------------------------------------------------------------------------------
 1 | Evaluation
 2 | ==========
 3 | 
 4 | .. toctree::
 5 |     evaluation/mteb
 6 |     evaluation/airbench
 7 |     evaluation/msmarco
 8 |     evaluation/beir
 9 |     evaluation/miracl
10 |     evaluation/mkqa
11 |     evaluation/mldr


--------------------------------------------------------------------------------
/docs/source/API/evaluation/airbench/arguments.rst:
--------------------------------------------------------------------------------
1 | arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.air_bench.AIRBenchEvalModelArgs


--------------------------------------------------------------------------------
/docs/source/API/evaluation/airbench/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.air_bench.AIRBenchEvalRunner


--------------------------------------------------------------------------------
/docs/source/API/evaluation/beir/arguments.rst:
--------------------------------------------------------------------------------
1 | arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.beir.arguments.BEIREvalArgs


--------------------------------------------------------------------------------
/docs/source/API/evaluation/beir/data_loader.rst:
--------------------------------------------------------------------------------
1 | data loader
2 | ===========
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.beir.data_loader.BEIREvalDataLoader


--------------------------------------------------------------------------------
/docs/source/API/evaluation/beir/evaluator.rst:
--------------------------------------------------------------------------------
1 | evaluator
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.beir.evaluator.BEIREvaluator


--------------------------------------------------------------------------------
/docs/source/API/evaluation/beir/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.beir.BEIREvalRunner


--------------------------------------------------------------------------------
/docs/source/API/evaluation/miracl/data_loader.rst:
--------------------------------------------------------------------------------
 1 | data_loader
 2 | ===========
 3 | 
 4 | .. autoclass:: FlagEmbedding.evaluation.miracl.MIRACLEvalDataLoader
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.evaluation.miracl.MIRACLEvalDataLoader.available_dataset_names
10 | .. automethod:: FlagEmbedding.evaluation.miracl.MIRACLEvalDataLoader.available_splits
11 | .. automethod:: FlagEmbedding.evaluation.miracl.MIRACLEvalDataLoader._load_remote_corpus
12 | .. automethod:: FlagEmbedding.evaluation.miracl.MIRACLEvalDataLoader._load_remote_qrels
13 | .. automethod:: FlagEmbedding.evaluation.miracl.MIRACLEvalDataLoader._load_remote_queries


--------------------------------------------------------------------------------
/docs/source/API/evaluation/miracl/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.miracl.MIRACLEvalRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mkqa/data_loader.rst:
--------------------------------------------------------------------------------
 1 | data_loader
 2 | ===========
 3 | 
 4 | .. autoclass:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader.available_dataset_names
10 | .. automethod:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader.available_splits
11 | .. automethod:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader.load_corpus
12 | .. automethod:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader._load_local_qrels
13 | .. automethod:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader._load_remote_corpus
14 | .. automethod:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader._load_remote_qrels
15 | .. automethod:: FlagEmbedding.evaluation.mkqa.MKQAEvalDataLoader._load_remote_queries


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mkqa/evaluator.rst:
--------------------------------------------------------------------------------
1 | evaluator
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.mkqa.MKQAEvaluator
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mkqa/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | .. autoclass:: FlagEmbedding.evaluation.mkqa.MKQAEvalRunner
4 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mldr/data_loader.rst:
--------------------------------------------------------------------------------
 1 | data_loader
 2 | ===========
 3 | 
 4 | .. autoclass:: FlagEmbedding.evaluation.mldr.MLDREvalDataLoader
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.evaluation.mldr.MLDREvalDataLoader.available_dataset_names
10 | .. automethod:: FlagEmbedding.evaluation.mldr.MLDREvalDataLoader.available_splits
11 | .. automethod:: FlagEmbedding.evaluation.mldr.MLDREvalDataLoader._load_remote_corpus
12 | .. automethod:: FlagEmbedding.evaluation.mldr.MLDREvalDataLoader._load_remote_qrels
13 | .. automethod:: FlagEmbedding.evaluation.mldr.MLDREvalDataLoader._load_remote_queries


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mldr/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.mldr.MLDREvalRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/evaluation/msmarco/data_loader.rst:
--------------------------------------------------------------------------------
 1 | data_loader
 2 | ===========
 3 | 
 4 | .. autoclass:: FlagEmbedding.evaluation.msmarco.MSMARCOEvalDataLoader
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.evaluation.msmarco.MSMARCOEvalDataLoader.available_dataset_names
10 | .. automethod:: FlagEmbedding.evaluation.msmarco.MSMARCOEvalDataLoader.available_splits
11 | .. automethod:: FlagEmbedding.evaluation.msmarco.MSMARCOEvalDataLoader._load_remote_corpus
12 | .. automethod:: FlagEmbedding.evaluation.msmarco.MSMARCOEvalDataLoader._load_remote_qrels
13 | .. automethod:: FlagEmbedding.evaluation.msmarco.MSMARCOEvalDataLoader._load_remote_queries


--------------------------------------------------------------------------------
/docs/source/API/evaluation/msmarco/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.msmarco.MSMARCOEvalRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mteb/arguments.rst:
--------------------------------------------------------------------------------
1 | arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.mteb.arguments.MTEBEvalArgs


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mteb/runner.rst:
--------------------------------------------------------------------------------
1 | runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.mteb.runner.MTEBEvalRunner


--------------------------------------------------------------------------------
/docs/source/API/evaluation/mteb/searcher.rst:
--------------------------------------------------------------------------------
1 | searcher
2 | ========
3 | 
4 | .. autoclass:: FlagEmbedding.evaluation.mteb.searcher.MTEBEvalDenseRetriever
5 | 
6 | .. autoclass:: FlagEmbedding.evaluation.mteb.searcher.MTEBEvalReranker


--------------------------------------------------------------------------------
/docs/source/API/finetune.rst:
--------------------------------------------------------------------------------
1 | Finetune
2 | ========
3 | 
4 | .. toctree::
5 |     finetune/embedder
6 |     finetune/reranker


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder.rst:
--------------------------------------------------------------------------------
1 | Embedder
2 | ========
3 | 
4 | .. toctree::
5 |     embedder/encoder_only
6 |     embedder/decoder_only


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only.rst:
--------------------------------------------------------------------------------
1 | Decoder Only
2 | ============
3 | 
4 | .. toctree::
5 |     decoder_only/base
6 |     decoder_only/icl


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/base.rst:
--------------------------------------------------------------------------------
1 | Base
2 | ====
3 | 
4 | .. toctree::
5 |     base/arguments
6 |     base/modeling
7 |     base/runner
8 |     base/trainer


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/base/arguments.rst:
--------------------------------------------------------------------------------
1 | Arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.base.DecoderOnlyEmbedderModelArguments
5 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/base/modeling.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Modeling
 3 | ========
 4 | 
 5 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.base.CrossDecoderModel
 6 | 
 7 | Methods
 8 | =======
 9 | 
10 | .. automethod:: FlagEmbedding.finetune.reranker.decoder_only.base.CrossDecoderModel.encode
11 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/base/runner.rst:
--------------------------------------------------------------------------------
1 | Runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.base.DecoderOnlyRerankerRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/base/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.base.DecoderOnlyRerankerTrainer
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/icl.rst:
--------------------------------------------------------------------------------
1 | ICL
2 | ===
3 | 
4 | .. toctree::
5 |     icl/arguments
6 |     icl/dataset
7 |     icl/modeling
8 |     icl/runner
9 |     icl/trainer


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/icl/arguments.rst:
--------------------------------------------------------------------------------
1 | Arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.icl.DecoderOnlyEmbedderICLModelArguments
5 | 
6 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.icl.DecoderOnlyEmbedderICLDataArguments


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/icl/dataset.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Dataset
 3 | =======
 4 | 
 5 | DecoderOnlyEmbedderICLSameDatasetTrainDataset
 6 | =============================================
 7 | 
 8 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.icl.DecoderOnlyEmbedderICLSameDatasetTrainDataset
 9 | 
10 | Methods
11 | -------
12 | 
13 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.DecoderOnlyEmbedderICLSameDatasetTrainDataset._create_batch_data
14 | 
15 | AbsEmbedderSameDatasetCollator
16 | ==============================
17 | 
18 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.icl.AbsEmbedderSameDatasetCollator


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/icl/modeling.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Modeling
 3 | ========
 4 | 
 5 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel
 6 | 
 7 | Methods
 8 | =======
 9 | 
10 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel.encode
11 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel.compute_score
12 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel.compute_loss
13 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel.gradient_checkpointing_enable
14 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel.enable_input_require_grads
15 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel.save
16 | 
17 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel._sentence_embedding
18 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.icl.BiDecoderOnlyEmbedderICLModel._compute_similarity
19 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/icl/runner.rst:
--------------------------------------------------------------------------------
1 | Runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.icl.DecoderOnlyEmbedderICLRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/decoder_only/icl/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.icl.DecoderOnlyEmbedderICLTrainer
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only.rst:
--------------------------------------------------------------------------------
1 | Encoder Only
2 | ============
3 | 
4 | .. toctree::
5 |     encoder_only/base
6 |     encoder_only/m3


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/base.rst:
--------------------------------------------------------------------------------
1 | Base
2 | ====
3 | 
4 | .. toctree::
5 |     base/modeling
6 |     base/runner
7 |     base/trainer


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/base/modeling.rst:
--------------------------------------------------------------------------------
 1 | Modeling
 2 | ========
 3 | 
 4 | .. autoclass:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel.encode
10 | 
11 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel.compute_score
12 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel.compute_loss
13 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel.gradient_checkpointing_enable
14 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel.enable_input_require_grads
15 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel.save
16 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel._sentence_embedding
17 | .. automethod:: FlagEmbedding.finetune.embedder.encoder_only.base.BiEncoderOnlyEmbedderModel._compute_similarity


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/base/runner.rst:
--------------------------------------------------------------------------------
1 | Runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.encoder_only.base.EncoderOnlyEmbedderRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/base/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.encoder_only.base.EncoderOnlyEmbedderTrainer
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/m3.rst:
--------------------------------------------------------------------------------
1 | M3
2 | ==
3 | 
4 | .. toctree::
5 |     m3/arguments
6 |     m3/modeling
7 |     m3/runner
8 |     m3/trainer


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/m3/arguments.rst:
--------------------------------------------------------------------------------
1 | Arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.encoder_only.m3.EncoderOnlyEmbedderM3ModelArguments
5 | 
6 | .. autoclass:: FlagEmbedding.finetune.embedder.encoder_only.m3.EncoderOnlyEmbedderM3TrainingArguments


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/m3/runner.rst:
--------------------------------------------------------------------------------
1 | Runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.encoder_only.m3.EncoderOnlyEmbedderM3Runner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/embedder/encoder_only/m3/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.encoder_only.m3.EncoderOnlyEmbedderM3Trainer
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker.rst:
--------------------------------------------------------------------------------
1 | Reranker
2 | ========
3 | 
4 | .. toctree::
5 |     reranker/encoder_only
6 |     reranker/decoder_only


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only.rst:
--------------------------------------------------------------------------------
1 | Decoder Only
2 | ============
3 | 
4 | .. toctree::
5 |     decoder_only/base
6 |     decoder_only/layerwise


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/base.rst:
--------------------------------------------------------------------------------
1 | Base
2 | ====
3 | 
4 | .. toctree::
5 |     base/arguments
6 |     base/modeling
7 |     base/runner
8 |     base/trainer


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/base/arguments.rst:
--------------------------------------------------------------------------------
1 | Arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.base.RerankerModelArguments
5 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/base/modeling.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Modeling
 3 | ========
 4 | 
 5 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel
 6 | 
 7 | Methods
 8 | =======
 9 | 
10 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel.encode
11 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel.compute_score
12 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel.compute_loss
13 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel.gradient_checkpointing_enable
14 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel.enable_input_require_grads
15 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel.save
16 | 
17 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel._sentence_embedding
18 | .. automethod:: FlagEmbedding.finetune.embedder.decoder_only.base.BiDecoderOnlyEmbedderModel._compute_similarity
19 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/base/runner.rst:
--------------------------------------------------------------------------------
1 | Runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.base.DecoderOnlyEmbedderRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/base/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.embedder.decoder_only.base.DecoderOnlyEmbedderTrainer
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/layerwise.rst:
--------------------------------------------------------------------------------
1 | Layerwise
2 | =========
3 | 
4 | .. toctree::
5 |     layerwise/arguments
6 |     layerwise/modeling
7 |     layerwise/runner
8 |     layerwise/trainer


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/layerwise/arguments.rst:
--------------------------------------------------------------------------------
1 | Arguments
2 | =========
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.layerwise.RerankerModelArguments
5 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/layerwise/modeling.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Modeling
 3 | ========
 4 | 
 5 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.layerwise.CrossDecoderModel
 6 | 
 7 | Methods
 8 | =======
 9 | 
10 | .. automethod:: FlagEmbedding.finetune.reranker.decoder_only.layerwise.CrossDecoderModel.encode
11 | .. automethod:: FlagEmbedding.finetune.reranker.decoder_only.layerwise.CrossDecoderModel.forward
12 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/layerwise/runner.rst:
--------------------------------------------------------------------------------
1 | Runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.layerwise.DecoderOnlyRerankerRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/decoder_only/layerwise/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.decoder_only.layerwise.DecoderOnlyRerankerTrainer
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/encoder_only.rst:
--------------------------------------------------------------------------------
1 | Encoder Only
2 | ============
3 | 
4 | .. toctree::
5 |     encoder_only/base


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/encoder_only/base.rst:
--------------------------------------------------------------------------------
1 | Base
2 | ====
3 | 
4 | .. toctree::
5 |     base/modeling
6 |     base/runner
7 |     base/trainer


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/encoder_only/base/modeling.rst:
--------------------------------------------------------------------------------
 1 | Modeling
 2 | ========
 3 | 
 4 | .. autoclass:: FlagEmbedding.finetune.reranker.encoder_only.base.CrossEncoderModel
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.finetune.reranker.encoder_only.base.CrossEncoderModel.encode
10 | 


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/encoder_only/base/runner.rst:
--------------------------------------------------------------------------------
1 | Runner
2 | ======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.encoder_only.base.EncoderOnlyRerankerRunner
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/finetune/reranker/encoder_only/base/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 | 
4 | .. autoclass:: FlagEmbedding.finetune.reranker.encoder_only.base.EncoderOnlyRerankerTrainer
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/API/index.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    abc
 8 |    inference
 9 |    evaluation
10 |    finetune


--------------------------------------------------------------------------------
/docs/source/API/inference.rst:
--------------------------------------------------------------------------------
1 | Inference
2 | =========
3 | 
4 | .. toctree::
5 |     inference/FlagAutoModel
6 |     inference/FlagAutoReranker
7 |     inference/embedder/embedder
8 |     inference/reranker/reranker


--------------------------------------------------------------------------------
/docs/source/API/inference/FlagAutoModel.rst:
--------------------------------------------------------------------------------
1 | FlagAutoModel
2 | =============
3 | 
4 | .. autoclass:: FlagEmbedding.inference.FlagAutoModel
5 | 
6 | Methods
7 | -------
8 | 
9 | .. automethod:: FlagEmbedding.inference.FlagAutoModel.from_finetuned


--------------------------------------------------------------------------------
/docs/source/API/inference/FlagAutoReranker.rst:
--------------------------------------------------------------------------------
1 | FlagAutoReranker
2 | ================
3 | 
4 | .. autoclass:: FlagEmbedding.inference.FlagAutoReranker
5 | 
6 | Methods
7 | -------
8 | 
9 | .. automethod:: FlagEmbedding.inference.FlagAutoReranker.from_finetuned


--------------------------------------------------------------------------------
/docs/source/API/inference/embedder/decoder_only/BaseLLMEmbedder.rst:
--------------------------------------------------------------------------------
 1 | BaseEmbedder
 2 | ============
 3 | 
 4 | .. autoclass:: FlagEmbedding.inference.embedder.decoder_only.base.BaseLLMEmbedder
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.base.BaseLLMEmbedder.encode_queries
10 | 
11 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.base.BaseLLMEmbedder.encode_corpus
12 | 
13 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.base.BaseLLMEmbedder.encode
14 | 
15 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.base.BaseLLMEmbedder.encode_single_device


--------------------------------------------------------------------------------
/docs/source/API/inference/embedder/decoder_only/ICLLLMEmbedder.rst:
--------------------------------------------------------------------------------
 1 | ICLLLMEmbedder
 2 | ==============
 3 | 
 4 | .. autoclass:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder.encode_queries
10 | 
11 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder.encode_corpus
12 | 
13 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder.encode
14 | 
15 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder.set_examples
16 | 
17 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder.get_detailed_example
18 | 
19 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder.encode_queries_single_device
20 | 
21 | .. automethod:: FlagEmbedding.inference.embedder.decoder_only.icl.ICLLLMEmbedder.encode_single_device


--------------------------------------------------------------------------------
/docs/source/API/inference/embedder/embedder.rst:
--------------------------------------------------------------------------------
1 | Embedder
2 | ========
3 | 
4 | .. toctree::
5 |     encoder_only/BaseEmbedder
6 |     encoder_only/M3Embedder
7 |     decoder_only/BaseLLMEmbedder
8 |     decoder_only/ICLLLMEmbedder


--------------------------------------------------------------------------------
/docs/source/API/inference/embedder/encoder_only/BaseEmbedder.rst:
--------------------------------------------------------------------------------
 1 | BaseEmbedder
 2 | ============
 3 | 
 4 | .. autoclass:: FlagEmbedding.inference.embedder.encoder_only.base.BaseEmbedder
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. automethod:: FlagEmbedding.inference.embedder.encoder_only.base.BaseEmbedder.encode_queries
10 |     :no-index:
11 | 
12 | .. automethod:: FlagEmbedding.inference.embedder.encoder_only.base.BaseEmbedder.encode_corpus
13 | 
14 | .. automethod:: FlagEmbedding.inference.embedder.encoder_only.base.BaseEmbedder.encode
15 | 
16 | .. automethod:: FlagEmbedding.inference.embedder.encoder_only.base.BaseEmbedder.encode_single_device
17 | 
18 | .. automethod:: FlagEmbedding.inference.embedder.encoder_only.base.BaseEmbedder.pooling


--------------------------------------------------------------------------------
/docs/source/API/inference/reranker/decoder_only/BaseLLMReranker.rst:
--------------------------------------------------------------------------------
 1 | BaseLLMReranker
 2 | ===============
 3 | 
 4 | .. autoclass:: FlagEmbedding.inference.reranker.decoder_only.base.BaseLLMReranker
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. autoclass:: FlagEmbedding.inference.reranker.decoder_only.base.BaseLLMReranker.compute_score_single_gpu
10 | 


--------------------------------------------------------------------------------
/docs/source/API/inference/reranker/decoder_only/LayerWiseLLMReranker.rst:
--------------------------------------------------------------------------------
 1 | LayerWiseLLMReranker
 2 | ====================
 3 | 
 4 | .. autoclass:: FlagEmbedding.inference.reranker.decoder_only.layerwise.LayerWiseLLMReranker
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. autoclass:: FlagEmbedding.inference.reranker.decoder_only.layerwise.LayerWiseLLMReranker.compute_score_single_gpu
10 | 


--------------------------------------------------------------------------------
/docs/source/API/inference/reranker/decoder_only/LightweightLLMReranker.rst:
--------------------------------------------------------------------------------
 1 | LightweightLLMReranker
 2 | ======================
 3 | 
 4 | .. autoclass:: FlagEmbedding.inference.reranker.decoder_only.lightweight.LightweightLLMReranker
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. autoclass:: FlagEmbedding.inference.reranker.decoder_only.lightweight.LightweightLLMReranker.compute_score_single_gpu
10 | 


--------------------------------------------------------------------------------
/docs/source/API/inference/reranker/encoder_only/BaseReranker.rst:
--------------------------------------------------------------------------------
 1 | BaseReranker
 2 | ============
 3 | 
 4 | .. autoclass:: FlagEmbedding.inference.reranker.encoder_only.base.BaseReranker
 5 | 
 6 | Methods
 7 | -------
 8 | 
 9 | .. autoclass:: FlagEmbedding.inference.reranker.encoder_only.base.BaseReranker.compute_score_single_gpu
10 | 


--------------------------------------------------------------------------------
/docs/source/API/inference/reranker/reranker.rst:
--------------------------------------------------------------------------------
1 | Reranker
2 | ========
3 | 
4 | .. toctree::
5 |     encoder_only/BaseReranker
6 |     decoder_only/BaseLLMReranker
7 |     decoder_only/LayerWiseLLMReranker
8 |     decoder_only/LightweightLLMReranker


--------------------------------------------------------------------------------
/docs/source/Introduction/index.rst:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | ============
 3 | 
 4 | BGE builds one-stop retrieval toolkit for search and RAG. We provide inference, evaluation, and fine-tuning for embedding models and reranker.
 5 | 
 6 | .. figure:: ../_static/img/RAG_pipeline.png
 7 |    :width: 700
 8 |    :align: center
 9 | 
10 |    BGE embedder and reranker in an RAG pipeline. `Source <https://safjan.com/images/retrieval_augmented_generation/RAG.png>`_
11 | 
12 | Quickly get started with:
13 | 
14 | .. toctree::
15 |    :maxdepth: 1
16 |    :caption: Start
17 | 
18 |    overview
19 |    installation
20 |    quick_start
21 |    
22 | 
23 | .. toctree::
24 |    :maxdepth: 1
25 |    :caption: Concept
26 | 
27 |    IR
28 |    embedder
29 |    reranker
30 |    similarity
31 |    retrieval_demo


--------------------------------------------------------------------------------
/docs/source/Introduction/overview.rst:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ========
 3 | 
 4 | Our repository provides well-structured `APIs <https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding>`_ for the inference, evaluation, and fine-tuning of BGE series models.
 5 | Besides that, there are abundant resources of  and  for users to quickly get a hands-on experience.
 6 | 
 7 | .. figure:: https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/refs/heads/master/imgs/projects.png
 8 |    :width: 700
 9 |    :align: center
10 | 
11 |    Structure of contents in our `repo <https://github.com/FlagOpen/FlagEmbedding>`_
12 | 
13 | Our repository provides well-structured contents for information retrieval and RAG:
14 | 
15 | - The core `APIs <../API>`_ for embedding models' inference, evaluation, and fine-tuning.
16 | - Hands-on `examples <https://github.com/FlagOpen/FlagEmbedding/tree/master/examples>`_ for the three mentioned use cases.
17 | - Detailed `tutorials <https://github.com/FlagOpen/FlagEmbedding/tree/master/Tutorials>`_ covering topics in retrieval to help you learn from scratch.


--------------------------------------------------------------------------------
/docs/source/Introduction/quick_start.rst:
--------------------------------------------------------------------------------
 1 | Quick Start
 2 | ===========
 3 | 
 4 | First, load one of the BGE embedding model:
 5 | 
 6 | .. code:: python
 7 | 
 8 |     from FlagEmbedding import FlagAutoModel
 9 | 
10 |     model = FlagAutoModel.from_finetuned('BAAI/bge-base-en-v1.5')
11 | 
12 | .. tip::
13 | 
14 |     If there's difficulty connecting to Hugging Face, you can use the `HF mirror <https://hf-mirror.com/>`_ instead.
15 | 
16 |     .. code:: bash
17 | 
18 |         export HF_ENDPOINT=https://hf-mirror.com
19 | 
20 | Then, feed some sentences to the model and get their embeddings:
21 | 
22 | .. code:: python
23 | 
24 |     sentences_1 = ["I love NLP", "I love machine learning"]
25 |     sentences_2 = ["I love BGE", "I love text retrieval"]
26 |     embeddings_1 = model.encode(sentences_1)
27 |     embeddings_2 = model.encode(sentences_2)
28 | 
29 | Once we get the embeddings, we can compute similarity by inner product:
30 | 
31 | .. code:: python
32 | 
33 |     similarity = embeddings_1 @ embeddings_2.T
34 |     print(similarity)
35 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | .bd-sidebar-primary {
2 |     width: 22%;
3 |     line-height: 1.4;
4 | }
5 | 
6 | .col-lg-3 {
7 |     flex: 0 0 auto;
8 |     width: 22%;
9 | }


--------------------------------------------------------------------------------
/docs/source/_static/img/BAAI_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/BAAI_logo.png


--------------------------------------------------------------------------------
/docs/source/_static/img/BGE_WeChat_Group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/BGE_WeChat_Group.png


--------------------------------------------------------------------------------
/docs/source/_static/img/C_MTEB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/C_MTEB.png


--------------------------------------------------------------------------------
/docs/source/_static/img/RAG_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/RAG_pipeline.png


--------------------------------------------------------------------------------
/docs/source/_static/img/bge_logo.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/bge_logo.jpeg


--------------------------------------------------------------------------------
/docs/source/_static/img/bge_panda.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/bge_panda.jpg


--------------------------------------------------------------------------------
/docs/source/_static/img/projects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/projects.png


--------------------------------------------------------------------------------
/docs/source/_static/img/word2vec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/docs/source/_static/img/word2vec.png


--------------------------------------------------------------------------------
/docs/source/bge/index.rst:
--------------------------------------------------------------------------------
 1 | BGE
 2 | ===
 3 | 
 4 | .. figure:: ../_static/img/bge_logo.jpeg
 5 |    :width: 250
 6 |    :align: center
 7 | 
 8 | **BGE** stands for **BAAI General Embeddings**, which is a series of embedding models released by BAAI.
 9 | 
10 | .. toctree::
11 |    :maxdepth: 1
12 |    :caption: Embedder
13 | 
14 |    bge_v1_v1.5
15 |    bge_m3
16 |    bge_icl
17 |    bge_vl
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 |    :caption: Reranker
22 | 
23 |    bge_reranker
24 |    bge_reranker_v2


--------------------------------------------------------------------------------
/docs/source/community/index.rst:
--------------------------------------------------------------------------------
 1 | Community
 2 | =========
 3 | 
 4 | Visit our `GitHub repo <https://github.com/FlagOpen/FlagEmbedding>`_ and 
 5 | `Hugging Face collection <https://huggingface.co/collections/BAAI/bge-66797a74476eb1f085c7446d>`_ for more materials!
 6 | 
 7 | We are also holding WeChat groups for for BGE. Scan the QR code to join the group chat! 
 8 | To get the first hand message about our updates and new release, or having any questions or ideas, join us now!
 9 | 
10 | .. figure:: ../_static/img/BGE_WeChat_Group.png
11 |    :width: 400
12 |    :align: center


--------------------------------------------------------------------------------
/docs/source/tutorial/1_Embedding.rst:
--------------------------------------------------------------------------------
 1 | 1. Embedding
 2 | ============
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: Embedding
 8 | 
 9 |    1_Embedding/1.1.1
10 |    1_Embedding/1.2.1
11 |    1_Embedding/1.2.2
12 |    1_Embedding/1.2.3
13 |    1_Embedding/1.2.4
14 |    1_Embedding/1.2.5


--------------------------------------------------------------------------------
/docs/source/tutorial/2_Metrics.rst:
--------------------------------------------------------------------------------
 1 | 2. Metrics
 2 | ==========
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: Metrics
 8 | 
 9 |    2_Metrics/2.1
10 |    2_Metrics/2.2


--------------------------------------------------------------------------------
/docs/source/tutorial/3_Indexing.rst:
--------------------------------------------------------------------------------
 1 | 3. Indexing
 2 | ===========
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: Indexing
 8 | 
 9 |    3_Indexing/3.1.1
10 |    3_Indexing/3.1.2
11 |    3_Indexing/3.1.3
12 |    3_Indexing/3.1.4
13 |    3_Indexing/3.1.5


--------------------------------------------------------------------------------
/docs/source/tutorial/4_Evaluation.rst:
--------------------------------------------------------------------------------
 1 | 4. Evaluation
 2 | =============
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: Evaluation
 8 | 
 9 |    4_Evaluation/4.1.1
10 |    4_Evaluation/4.2.1
11 |    4_Evaluation/4.2.2
12 |    4_Evaluation/4.2.3
13 |    4_Evaluation/4.3.1
14 |    4_Evaluation/4.4.1
15 |    4_Evaluation/4.5.1
16 |    4_Evaluation/4.5.2
17 | 


--------------------------------------------------------------------------------
/docs/source/tutorial/5_Reranking.rst:
--------------------------------------------------------------------------------
 1 | 5. Reranking
 2 | ============
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: Reranking
 8 | 
 9 |    5_Reranking/5.1
10 |    5_Reranking/5.2
11 |    5_Reranking/5.3


--------------------------------------------------------------------------------
/docs/source/tutorial/6_RAG.rst:
--------------------------------------------------------------------------------
 1 | 6. RAG
 2 | ======
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: RAG
 8 | 
 9 |    6_RAG/6.1
10 |    6_RAG/6.2
11 |    6_RAG/6.3


--------------------------------------------------------------------------------
/docs/source/tutorial/7_Finetuning.rst:
--------------------------------------------------------------------------------
 1 | 7. Finetuning
 2 | =============
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: Finetuning
 8 | 
 9 |    7_Finetuning/7.1.1
10 |    7_Finetuning/7.1.2
11 |    7_Finetuning/7.1.3
12 |    7_Finetuning/7.2.1


--------------------------------------------------------------------------------
/docs/source/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | In this section, we provide hands on introduction to different topics that highly related to embedding models and retrieval. 
 5 | 
 6 | To run the tutorials, clone the GitHub repo and check the `Tutorials <https://github.com/FlagOpen/FlagEmbedding/tree/master/Tutorials>`_ folder.
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 1
10 |    :caption: Tutorials
11 | 
12 |    1_Embedding
13 |    2_Metrics
14 |    3_Indexing
15 |    4_Evaluation
16 |    5_Reranking
17 |    6_RAG
18 |    7_Finetuning


--------------------------------------------------------------------------------
/examples/evaluation/air_bench/eval_air_bench.sh:
--------------------------------------------------------------------------------
 1 | if [ -z "$HF_HUB_CACHE" ]; then
 2 |     export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
 3 | fi
 4 | 
 5 | eval_args="\
 6 |     --benchmark_version AIR-Bench_24.05 \
 7 |     --task_types qa long-doc \
 8 |     --domains arxiv \
 9 |     --languages en \
10 |     --splits dev test \
11 |     --output_dir ./air_bench/search_results \
12 |     --search_top_k 1000 --rerank_top_k 100 \
13 |     --cache_dir $HF_HUB_CACHE \
14 |     --overwrite False \
15 | "
16 | 
17 | model_args="\
18 |     --embedder_name_or_path BAAI/bge-m3 \
19 |     --reranker_name_or_path BAAI/bge-reranker-v2-m3 \
20 |     --devices cuda:0 cuda:1 \
21 |     --model_cache_dir $HF_HUB_CACHE \
22 |     --reranker_max_length 1024 \
23 | "
24 | 
25 | cmd="python -m FlagEmbedding.evaluation.air_bench \
26 |     $eval_args \
27 |     $model_args \
28 | "
29 | 
30 | echo $cmd
31 | eval $cmd
32 | 


--------------------------------------------------------------------------------
/examples/evaluation/beir/eval_beir.sh:
--------------------------------------------------------------------------------
 1 | if [ -z "$HF_HUB_CACHE" ]; then
 2 |     export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
 3 | fi
 4 | 
 5 | dataset_names="fiqa arguana cqadupstack"
 6 | 
 7 | eval_args="\
 8 |     --eval_name beir \
 9 |     --dataset_dir ./beir/data \
10 |     --dataset_names $dataset_names \
11 |     --splits test dev \
12 |     --corpus_embd_save_dir ./beir/corpus_embd \
13 |     --output_dir ./beir/search_results \
14 |     --search_top_k 1000 --rerank_top_k 100 \
15 |     --cache_path $HF_HUB_CACHE \
16 |     --overwrite False \
17 |     --k_values 10 100 \
18 |     --eval_output_method markdown \
19 |     --eval_output_path ./beir/beir_eval_results.md \
20 |     --eval_metrics ndcg_at_10 recall_at_100 \
21 |     --ignore_identical_ids True \
22 | "
23 | 
24 | model_args="\
25 |     --embedder_name_or_path BAAI/bge-large-en-v1.5 \
26 |     --reranker_name_or_path BAAI/bge-reranker-v2-m3 \
27 |     --devices cuda:0 cuda:1 \
28 |     --cache_dir $HF_MODEL_CACHE \
29 |     --reranker_max_length 1024 \
30 | "
31 | 
32 | cmd="python -m FlagEmbedding.evaluation.beir \
33 |     $eval_args \
34 |     $model_args \
35 | "
36 | 
37 | echo $cmd
38 | eval $cmd
39 | 


--------------------------------------------------------------------------------
/examples/evaluation/miracl/eval_miracl.sh:
--------------------------------------------------------------------------------
 1 | if [ -z "$HF_HUB_CACHE" ]; then
 2 |     export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
 3 | fi
 4 | 
 5 | dataset_names="bn hi sw te th yo"
 6 | 
 7 | eval_args="\
 8 |     --eval_name miracl \
 9 |     --dataset_dir ./miracl/data \
10 |     --dataset_names $dataset_names \
11 |     --splits dev \
12 |     --corpus_embd_save_dir ./miracl/corpus_embd \
13 |     --output_dir ./miracl/search_results \
14 |     --search_top_k 1000 --rerank_top_k 100 \
15 |     --cache_path $HF_HUB_CACHE \
16 |     --overwrite False \
17 |     --k_values 10 100 \
18 |     --eval_output_method markdown \
19 |     --eval_output_path ./miracl/miracl_eval_results.md \
20 |     --eval_metrics ndcg_at_10 recall_at_100 \
21 | "
22 | 
23 | model_args="\
24 |     --embedder_name_or_path BAAI/bge-m3 \
25 |     --reranker_name_or_path BAAI/bge-reranker-v2-m3 \
26 |     --devices cuda:0 cuda:1 \
27 |     --cache_dir $HF_HUB_CACHE \
28 |     --reranker_max_length 1024 \
29 | "
30 | 
31 | cmd="python -m FlagEmbedding.evaluation.miracl \
32 |     $eval_args \
33 |     $model_args \
34 | "
35 | 
36 | echo $cmd
37 | eval $cmd
38 | 


--------------------------------------------------------------------------------
/examples/evaluation/mkqa/eval_mkqa.sh:
--------------------------------------------------------------------------------
 1 | if [ -z "$HF_HUB_CACHE" ]; then
 2 |     export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
 3 | fi
 4 | 
 5 | dataset_names="en zh_cn"
 6 | 
 7 | eval_args="\
 8 |     --eval_name mkqa \
 9 |     --dataset_dir ./mkqa/data \
10 |     --dataset_names $dataset_names \
11 |     --splits test \
12 |     --corpus_embd_save_dir ./mkqa/corpus_embd \
13 |     --output_dir ./mkqa/search_results \
14 |     --search_top_k 1000 --rerank_top_k 100 \
15 |     --cache_path $HF_HUB_CACHE \
16 |     --overwrite False \
17 |     --k_values 20 \
18 |     --eval_output_method markdown \
19 |     --eval_output_path ./mkqa/mkqa_eval_results.md \
20 |     --eval_metrics qa_recall_at_20 \
21 | "
22 | 
23 | model_args="\
24 |     --embedder_name_or_path BAAI/bge-m3 \
25 |     --reranker_name_or_path BAAI/bge-reranker-v2-m3 \
26 |     --devices cuda:0 cuda:1 \
27 |     --cache_dir $HF_HUB_CACHE \
28 |     --reranker_max_length 1024 \
29 | "
30 | 
31 | cmd="python -m FlagEmbedding.evaluation.mkqa \
32 |     $eval_args \
33 |     $model_args \
34 | "
35 | 
36 | echo $cmd
37 | eval $cmd
38 | 


--------------------------------------------------------------------------------
/examples/evaluation/mldr/eval_mldr.sh:
--------------------------------------------------------------------------------
 1 | if [ -z "$HF_HUB_CACHE" ]; then
 2 |     export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
 3 | fi
 4 | 
 5 | dataset_names="hi"
 6 | 
 7 | eval_args="\
 8 |     --eval_name mldr \
 9 |     --dataset_dir ./mldr/data \
10 |     --dataset_names $dataset_names \
11 |     --splits test \
12 |     --corpus_embd_save_dir ./mldr/corpus_embd \
13 |     --output_dir ./mldr/search_results \
14 |     --search_top_k 1000 --rerank_top_k 100 \
15 |     --cache_path $HF_HUB_CACHE \
16 |     --overwrite False \
17 |     --k_values 10 100 \
18 |     --eval_output_method markdown \
19 |     --eval_output_path ./mldr/mldr_eval_results.md \
20 |     --eval_metrics ndcg_at_10 \
21 | "
22 | 
23 | model_args="\
24 |     --embedder_name_or_path BAAI/bge-m3 \
25 |     --reranker_name_or_path BAAI/bge-reranker-v2-m3 \
26 |     --devices cuda:0 cuda:1 \
27 |     --cache_dir $HF_HUB_CACHE \
28 |     --embedder_passage_max_length 8192 \
29 |     --reranker_max_length 8192 \
30 | "
31 | 
32 | cmd="python -m FlagEmbedding.evaluation.mldr \
33 |     $eval_args \
34 |     $model_args \
35 | "
36 | 
37 | echo $cmd
38 | eval $cmd
39 | 


--------------------------------------------------------------------------------
/examples/evaluation/msmarco/eval_msmarco.sh:
--------------------------------------------------------------------------------
 1 | if [ -z "$HF_HUB_CACHE" ]; then
 2 |     export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
 3 | fi
 4 | 
 5 | dataset_names="passage"
 6 | 
 7 | eval_args="\
 8 |     --eval_name msmarco \
 9 |     --dataset_dir ./msmarco/data \
10 |     --dataset_names $dataset_names \
11 |     --splits dev \
12 |     --corpus_embd_save_dir ./msmarco/corpus_embd \
13 |     --output_dir ./msmarco/search_results \
14 |     --search_top_k 1000 --rerank_top_k 100 \
15 |     --cache_path $HF_HUB_CACHE \
16 |     --overwrite True \
17 |     --k_values 10 100 \
18 |     --eval_output_method markdown \
19 |     --eval_output_path ./msmarco/msmarco_eval_results.md \
20 |     --eval_metrics ndcg_at_10 recall_at_100 \
21 | "
22 | 
23 | model_args="\
24 |     --embedder_name_or_path BAAI/bge-large-en-v1.5 \
25 |     --reranker_name_or_path BAAI/bge-reranker-v2-m3 \
26 |     --devices cuda:0 cuda:1 cuda:2 cuda:3 cuda:4 cuda:5 cuda:6 cuda:7 \
27 |     --cache_dir $HF_HUB_CACHE \
28 |     --reranker_max_length 1024 \
29 | "
30 | 
31 | cmd="python -m FlagEmbedding.evaluation.msmarco \
32 |     $eval_args \
33 |     $model_args \
34 | "
35 | 
36 | echo $cmd
37 | eval $cmd
38 | 


--------------------------------------------------------------------------------
/examples/evaluation/mteb/eval_mteb.sh:
--------------------------------------------------------------------------------
 1 | if [ -z "$HF_HUB_CACHE" ]; then
 2 |     export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
 3 | fi
 4 | 
 5 | languages="eng"
 6 | tasks="NFCorpus BiorxivClusteringS2S SciDocsRR"
 7 | 
 8 | eval_args="\
 9 |     --eval_name mteb \
10 |     --output_dir ./mteb/search_results \
11 |     --languages $languages \
12 |     --tasks $tasks \
13 |     --eval_output_path ./mteb/mteb_eval_results.json
14 | "
15 | 
16 | model_args="\
17 |     --embedder_name_or_path BAAI/bge-large-en-v1.5 \
18 |     --devices cuda:7 \
19 |     --cache_dir $HF_HUB_CACHE \
20 | "
21 | 
22 | cmd="python -m FlagEmbedding.evaluation.mteb \
23 |     $eval_args \
24 |     $model_args \
25 | "
26 | 
27 | echo $cmd
28 | eval $cmd
29 | 


--------------------------------------------------------------------------------
/examples/finetune/ds_stage0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "zero_optimization": {
 3 |         "stage": 0
 4 |     },
 5 |     
 6 |     "fp16": {
 7 |         "enabled": "auto",
 8 |         "loss_scale": 0,
 9 |         "loss_scale_window": 1000,
10 |         "initial_scale_power": 12,
11 |         "hysteresis": 2,
12 |         "min_loss_scale": 1
13 |     },
14 | 
15 |     "bf16": {
16 |         "enabled": "auto"
17 |     },
18 | 
19 |     "optimizer": {
20 |         "type": "AdamW",
21 |         "params": {
22 |             "lr": "auto",
23 |             "betas": "auto",
24 |             "eps": "auto",
25 |             "weight_decay": "auto"
26 |         }
27 |     },
28 | 
29 |     "scheduler": {
30 |         "type": "WarmupDecayLR",
31 |         "params": {
32 |             "warmup_min_lr": "auto",
33 |             "warmup_max_lr": "auto",
34 |             "warmup_num_steps": "auto",
35 |             "total_num_steps": "auto"
36 |         }
37 |     },
38 | 
39 |     "gradient_accumulation_steps": "auto",
40 |     "gradient_clipping": "auto",
41 |     "steps_per_print": 100,
42 |     "train_batch_size": "auto",
43 |     "train_micro_batch_size_per_gpu": "auto",
44 |     "wall_clock_breakdown": false
45 | }


--------------------------------------------------------------------------------
/imgs/BGE_WeChat_Group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/imgs/BGE_WeChat_Group.png


--------------------------------------------------------------------------------
/imgs/FlagOpen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/imgs/FlagOpen.png


--------------------------------------------------------------------------------
/imgs/bge_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/imgs/bge_logo.jpg


--------------------------------------------------------------------------------
/imgs/cir_candi_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/imgs/cir_candi_1.png


--------------------------------------------------------------------------------
/imgs/cir_candi_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/imgs/cir_candi_2.png


--------------------------------------------------------------------------------
/imgs/cir_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/imgs/cir_query.png


--------------------------------------------------------------------------------
/imgs/projects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/imgs/projects.png


--------------------------------------------------------------------------------
/research/BGE_Coder/evaluation/coderag_eval/eval.sh:
--------------------------------------------------------------------------------
 1 | cd ./code-rag-bench/retrieval/
 2 | 
 3 | output_dir='result'
 4 | 
 5 | for dataset_name in "humaneval" "mbpp" "repoeval" "ds1000_all_completion" "odex_en" "swe-bench-lite"
 6 | do
 7 | echo "dataset_name: ${dataset_name}"
 8 | python main.py \
 9 |     --embedder_name_or_path BAAI/bge-code-v1 \
10 |     --embedder_model_class decoder-only-base \
11 |     --query_instruction_format_for_retrieval '<instruct>{}\n<query>{}' \
12 |     --embedder_query_max_length 2048 \
13 |     --embedder_passage_max_length 2048 \
14 |     --trust_remote_code True \
15 |     --pooling_method last_token \
16 |     --embedder_batch_size 64 \
17 |     --devices cuda:0 cuda:1 cuda:2 cuda:3 cuda:4 cuda:5 cuda:6 cuda:7 \
18 |     --cache_dir ./cache \
19 |     --dataset $dataset_name \
20 |     --output_file ../../${output_dir}/${dataset_name}_output.json \
21 |     --results_file ../../${output_dir}/${dataset_name}_results.json
22 | done


--------------------------------------------------------------------------------
/research/BGE_Coder/evaluation/coderag_eval/prepare_data.sh:
--------------------------------------------------------------------------------
1 | cd ./code-rag-bench/retrieval/
2 | 
3 | for dataset_name in "humaneval" "mbpp" "live_code_bench" "ds1000" "odex" "repoeval_repo" "swebench_repo"
4 | do
5 | echo "dataset_name: ${dataset_name}"
6 | PYTHONPATH=./ python create/${dataset_name}.py
7 | done


--------------------------------------------------------------------------------
/research/BGE_Coder/evaluation/coir_eval/eval.sh:
--------------------------------------------------------------------------------
 1 | output_dir=result
 2 | 
 3 | python main.py \
 4 |     --output_dir ${output_dir} \
 5 |     --use_special_instructions True \
 6 |     --embedder_name_or_path BAAI/bge-code-v1 \
 7 |     --embedder_model_class decoder-only-base \
 8 |     --query_instruction_format_for_retrieval '<instruct>{}\n<query>{}' \
 9 |     --embedder_query_max_length 2048 \
10 |     --embedder_passage_max_length 2048 \
11 |     --trust_remote_code True \
12 |     --pooling_method last_token \
13 |     --embedder_batch_size 64 \
14 |     --devices cuda:0 cuda:1 cuda:2 cuda:3 cuda:4 cuda:5 cuda:6 cuda:7 \
15 |     --tasks apps codetrans-contest codetrans-dl cosqa synthetic-text2sql stackoverflow-qa codefeedback-mt codefeedback-st CodeSearchNet-ccr-go CodeSearchNet-ccr-java CodeSearchNet-ccr-javascript CodeSearchNet-ccr-php CodeSearchNet-ccr-python CodeSearchNet-ccr-ruby CodeSearchNet-go CodeSearchNet-java CodeSearchNet-javascript CodeSearchNet-php CodeSearchNet-python CodeSearchNet-ruby \
16 |     --cache_dir ./cache


--------------------------------------------------------------------------------
/research/BGE_Coder/paper/CodeR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_Coder/paper/CodeR.pdf


--------------------------------------------------------------------------------
/research/BGE_M3/BGE_M3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_M3/BGE_M3.pdf


--------------------------------------------------------------------------------
/research/BGE_M3/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling import BGEM3Model, BGEM3ForInference, EncoderOutput
2 | from .trainer import BiTrainer


--------------------------------------------------------------------------------
/research/BGE_M3/imgs/bm25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_M3/imgs/bm25.jpg


--------------------------------------------------------------------------------
/research/BGE_M3/imgs/long.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_M3/imgs/long.jpg


--------------------------------------------------------------------------------
/research/BGE_M3/imgs/miracl.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_M3/imgs/miracl.jpg


--------------------------------------------------------------------------------
/research/BGE_M3/imgs/mkqa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_M3/imgs/mkqa.jpg


--------------------------------------------------------------------------------
/research/BGE_M3/imgs/nqa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_M3/imgs/nqa.jpg


--------------------------------------------------------------------------------
/research/BGE_M3/imgs/others.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_M3/imgs/others.webp


--------------------------------------------------------------------------------
/research/BGE_VL/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 JUNJIE99
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/research/BGE_VL/assets/cir_candi_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/cir_candi_1.png


--------------------------------------------------------------------------------
/research/BGE_VL/assets/cir_candi_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/cir_candi_2.png


--------------------------------------------------------------------------------
/research/BGE_VL/assets/cir_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/cir_query.png


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000032077.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000032077.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000050549.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000050549.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000098911.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000098911.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000156031.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000156031.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000244097.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000244097.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000272130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000272130.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000275230.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000275230.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000311907.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000311907.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000357304.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000357304.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000478916.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000478916.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/corpus/000000545037.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/corpus/000000545037.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/query/000000530944.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/query/000000530944.jpg


--------------------------------------------------------------------------------
/research/BGE_VL/assets/res-ft-mmeb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/res-ft-mmeb.png


--------------------------------------------------------------------------------
/research/BGE_VL/assets/res-scaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/res-scaling.png


--------------------------------------------------------------------------------
/research/BGE_VL/assets/res-zs-cir.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/res-zs-cir.png


--------------------------------------------------------------------------------
/research/BGE_VL/assets/res-zs-mmeb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL/assets/res-zs-mmeb.png


--------------------------------------------------------------------------------
/research/BGE_VL_Screenshot/assets/neg_1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL_Screenshot/assets/neg_1.jpeg


--------------------------------------------------------------------------------
/research/BGE_VL_Screenshot/assets/neg_2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL_Screenshot/assets/neg_2.jpeg


--------------------------------------------------------------------------------
/research/BGE_VL_Screenshot/assets/positive_1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL_Screenshot/assets/positive_1.jpeg


--------------------------------------------------------------------------------
/research/BGE_VL_Screenshot/assets/positive_2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL_Screenshot/assets/positive_2.jpeg


--------------------------------------------------------------------------------
/research/BGE_VL_Screenshot/assets/query_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL_Screenshot/assets/query_1.png


--------------------------------------------------------------------------------
/research/BGE_VL_Screenshot/assets/query_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/BGE_VL_Screenshot/assets/query_2.png


--------------------------------------------------------------------------------
/research/C_MTEB/C_MTEB/__init__.py:
--------------------------------------------------------------------------------
 1 | # from .tasks import *
 2 | from .tasks import *
 3 | 
 4 | ChineseTaskList = [
 5 |     'TNews', 'IFlyTek', 'MultilingualSentiment', 'JDReview', 'OnlineShopping', 'Waimai',
 6 |     'CLSClusteringS2S.v2', 'CLSClusteringP2P.v2', 'ThuNewsClusteringS2S.v2', 'ThuNewsClusteringP2P.v2',
 7 |     'Ocnli', 'Cmnli',
 8 |     'T2Reranking', 'MMarcoReranking', 'CMedQAv1-reranking', 'CMedQAv2-reranking',
 9 |     'T2Retrieval', 'MMarcoRetrieval', 'DuRetrieval', 'CovidRetrieval', 'CmedqaRetrieval', 'EcomRetrieval', 'MedicalRetrieval', 'VideoRetrieval',
10 |     'ATEC', 'BQ', 'LCQMC', 'PAWSX', 'STSB', 'AFQMC', 'QBQTC'
11 | ]
12 | 


--------------------------------------------------------------------------------
/research/C_MTEB/C_MTEB/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from .Classification import *
2 | from .Clustering import *
3 | from .PairClassification import *
4 | from .Reranking import *
5 | from .Retrieval import *
6 | from .STS import *
7 | 


--------------------------------------------------------------------------------
/research/C_MTEB/MKQA/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/C_MTEB/MKQA/utils/__init__.py


--------------------------------------------------------------------------------
/research/C_MTEB/eval_cross_encoder.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from C_MTEB.tasks import *
 4 | from mteb import MTEB
 5 | 
 6 | from FlagEmbedding import FlagReranker
 7 | 
 8 | 
 9 | def get_args():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('--model_name_or_path', default="BAAI/bge-reranker-base", type=str)
12 |     return parser.parse_args()
13 | 
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     args = get_args()
18 | 
19 |     model = FlagReranker(args.model_name_or_path, use_fp16=True)
20 | 
21 |     if 'checkpoint-' in args.model_name_or_path:
22 |         save_name = "_".join(args.model_name_or_path.split('/')[-2:])
23 |     else:
24 |         save_name = "_".join(args.model_name_or_path.split('/')[-1:])
25 | 
26 |     evaluation = MTEB(task_types=["Reranking"], task_langs=['zh', 'zh2en', 'en2zh'])
27 |     evaluation.run(model, output_folder=f"reranker_results/{save_name}")
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/research/C_MTEB/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", mode="r", encoding="utf-8") as readme_file:
 4 |     readme = readme_file.read()
 5 | 
 6 | setup(
 7 |     name='C_MTEB',
 8 |     version='1.1.1',
 9 |     description='Chinese Massive Text Embedding Benchmark',
10 |     long_description=readme,
11 |     long_description_content_type="text/markdown",
12 |     author_email='2906698981@qq.com',
13 |     url='https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB',
14 |     packages=find_packages(),
15 |     install_requires=[
16 |         'mteb[beir]==1.1.1',
17 |     ],
18 | )
19 | 


--------------------------------------------------------------------------------
/research/LLARA/finetune/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/LLARA/finetune/__init__.py


--------------------------------------------------------------------------------
/research/LLARA/pretrain/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/LLARA/pretrain/__init__.py


--------------------------------------------------------------------------------
/research/LM_Cocktail/LM_Cocktail/__init__.py:
--------------------------------------------------------------------------------
1 | from .cocktail import mix_models, mix_models_with_data, mix_models_by_layers
2 | 


--------------------------------------------------------------------------------
/research/LM_Cocktail/images/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/LM_Cocktail/images/1.png


--------------------------------------------------------------------------------
/research/LM_Cocktail/images/pic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/LM_Cocktail/images/pic.png


--------------------------------------------------------------------------------
/research/LM_Cocktail/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", mode="r", encoding="utf-8") as readme_file:
 4 |     readme = readme_file.read()
 5 | 
 6 | setup(
 7 |     name='LM_Cocktail',
 8 |     version='0.0.5',
 9 |     description='LM_Cocktail',
10 |     long_description=readme,
11 |     long_description_content_type="text/markdown",
12 |     author_email='2906698981@qq.com',
13 |     url='https://github.com/FlagOpen/FlagEmbedding/LM_Cocktail',
14 |     packages=find_packages(),
15 |     install_requires=[
16 |         'torch>=1.6.0',
17 |         'transformers>=4.18.0',
18 |         'datasets',
19 |         'accelerate>=0.20.1'
20 |     ],
21 | )
22 | 


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/data/config/code.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mixture": {
 3 |         "commoncrawl": 10,
 4 |         "c4": 10,
 5 |         "github": 25,
 6 |         "book": 10,
 7 |         "arxiv": 10,
 8 |         "wiki": 10,
 9 |         "stackexchange": 25
10 |     },
11 |     "num_tokens_avg": {
12 |         "commoncrawl": 1207,
13 |         "c4": 378,
14 |         "wiki": 393,
15 |         "stackexchange": 309,
16 |         "github": 436,
17 |         "book": 89373,
18 |         "arxiv": 7375
19 |     }
20 | }


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/data/config/even.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mixture": {
 3 |         "commoncrawl": 14.2,
 4 |         "c4": 14.2,
 5 |         "github": 14.2,
 6 |         "book": 14.2,
 7 |         "arxiv": 14.2,
 8 |         "wiki": 14.2,
 9 |         "stackexchange": 14.2
10 |     },
11 |     "num_tokens_avg": {
12 |         "commoncrawl": 1207,
13 |         "c4": 378,
14 |         "wiki": 393,
15 |         "stackexchange": 309,
16 |         "github": 436,
17 |         "book": 89373,
18 |         "arxiv": 7375
19 |     }
20 | }


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/data/config/fsdp-offload.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_cpu_ram_efficient_loading: true
 9 |   fsdp_forward_prefetch: false
10 |   fsdp_offload_params: false
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: FULL_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: true
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: bf16
18 | num_machines: 1
19 | num_processes: 8
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false
26 | 


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/data/config/fsdp.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_cpu_ram_efficient_loading: false
 9 |   fsdp_forward_prefetch: false
10 |   fsdp_offload_params: false
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: FULL_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: true
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: bf16
18 | num_machines: 1
19 | num_processes: 8
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false
26 | 


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/data/config/slimpajama.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mixture": {
 3 |         "commoncrawl": 52.2,
 4 |         "c4": 26.7,
 5 |         "github": 5.2,
 6 |         "book": 4.2,
 7 |         "arxiv": 4.6,
 8 |         "wiki": 3.8,
 9 |         "stackexchange": 3.3
10 |     },
11 |     "num_tokens_avg": {
12 |         "commoncrawl": 1207,
13 |         "c4": 378,
14 |         "wiki": 393,
15 |         "stackexchange": 309,
16 |         "github": 436,
17 |         "book": 89373,
18 |         "arxiv": 7375
19 |     }
20 | }


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/data/config/zero3-infer-offload.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | deepspeed_config:
 4 |   gradient_accumulation_steps: 1
 5 |   offload_optimizer_device: cpu
 6 |   offload_param_device: cpu
 7 |   zero3_init_flag: false
 8 |   zero3_save_16bit_model: true
 9 |   zero_stage: 3
10 | distributed_type: DEEPSPEED
11 | downcast_bf16: 'no'
12 | machine_rank: 0
13 | main_training_function: main
14 | mixed_precision: bf16
15 | num_machines: 1
16 | num_processes: 8
17 | rdzv_backend: static
18 | same_network: true
19 | tpu_env: []
20 | tpu_use_cluster: false
21 | tpu_use_sudo: false
22 | use_cpu: false
23 | 


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/data/config/zero3-infer.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | deepspeed_config:
 4 |   gradient_accumulation_steps: 1
 5 |   offload_optimizer_device: none
 6 |   offload_param_device: none
 7 |   zero3_init_flag: false
 8 |   zero3_save_16bit_model: true
 9 |   zero_stage: 3
10 | distributed_type: DEEPSPEED
11 | downcast_bf16: 'no'
12 | machine_rank: 0
13 | main_training_function: main
14 | mixed_precision: bf16
15 | num_machines: 1
16 | num_processes: 8
17 | rdzv_backend: static
18 | same_network: true
19 | tpu_env: []
20 | tpu_use_cluster: false
21 | tpu_use_sudo: false
22 | use_cpu: false
23 | 


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/src/llama/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling_llama import LlamaForCausalLM
2 | from .configuration_llama import LlamaConfig


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/src/mistral/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling_mistral import MistralForCausalLM
2 | from .configuration_mistral import MistralConfig


--------------------------------------------------------------------------------
/research/Long_LLM/activation_beacon/src/qwen2/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling_qwen2 import Qwen2ForCausalLM
2 | from .configuration_qwen2 import Qwen2Config


--------------------------------------------------------------------------------
/research/Long_LLM/longllm_qlora/data_pipeline/data/README.md:
--------------------------------------------------------------------------------
1 | This dictionary is used for saving processed data and results.


--------------------------------------------------------------------------------
/research/Long_LLM/longllm_qlora/data_pipeline/raw_data/README.md:
--------------------------------------------------------------------------------
1 | This dictionary is used for saving raw data.


--------------------------------------------------------------------------------
/research/Long_LLM/longllm_qlora/imgs/needle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/Long_LLM/longllm_qlora/imgs/needle.png


--------------------------------------------------------------------------------
/research/MLVU/figs/statistic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/MLVU/figs/statistic.png


--------------------------------------------------------------------------------
/research/MLVU/figs/task_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/MLVU/figs/task_example.png


--------------------------------------------------------------------------------
/research/Matroyshka_reranker/finetune/compensation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/Matroyshka_reranker/finetune/compensation/__init__.py


--------------------------------------------------------------------------------
/research/Matroyshka_reranker/finetune/self_distillation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/Matroyshka_reranker/finetune/self_distillation/__init__.py


--------------------------------------------------------------------------------
/research/Matroyshka_reranker/inference/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/Matroyshka_reranker/inference/__init__.py


--------------------------------------------------------------------------------
/research/Matroyshka_reranker/requirements.txt:
--------------------------------------------------------------------------------
 1 | tiktoken==0.6.0
 2 | tornado==6.4
 3 | langchain_openai==0.0.6
 4 | rapidfuzz==3.6.1
 5 | sql_metadata==2.10.0
 6 | func_timeout==4.3.5
 7 | pandas==2.2.1
 8 | sqlglot==22.1.1
 9 | rank_bm25==0.2.2
10 | peft==0.10.0
11 | transformers==4.41.1
12 | jinja2
13 | datasets
14 | sentencepiece
15 | flash-attn
16 | modelscope
17 | deepspeed
18 | bitsandbytes


--------------------------------------------------------------------------------
/research/Reinforced_IR/data_generation/agent/__init__.py:
--------------------------------------------------------------------------------
1 | from .gpt import GPTAgent
2 | from .vllm import LLMAgent
3 | from .vllm_instruct import LLMInstructAgent


--------------------------------------------------------------------------------
/research/Reinforced_IR/data_generation/prompts/train_prompts.py:
--------------------------------------------------------------------------------
 1 | generate_train_answer = """Please generate a brief answer to the given query according to the reference passage.
 2 | 
 3 | Query: {query}
 4 | 
 5 | Reference passage: {passage}
 6 | 
 7 | Answer: """
 8 | 
 9 | generate_train_query = """Please generate a concise query from the following corpus.
10 | 
11 | Corpus: {passage}
12 | 
13 | Query: """
14 | 
15 | generate_train_query_type2 = """Generate a concise query using the key terms based on the following corpus.
16 | 
17 | Corpus: {passage}
18 | 
19 | Concise query: """
20 | 
21 | #  The query is a user query and should be short.


--------------------------------------------------------------------------------
/research/Reinforced_IR/finetune/generator/save_tokenizer.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import json
 4 | import copy
 5 | 
 6 | from transformers import AutoTokenizer
 7 | 
 8 | 
 9 | def parse_option():
10 |     parser = argparse.ArgumentParser("")
11 | 
12 |     parser.add_argument('--model_path', type=str, default=None)
13 |     parser.add_argument('--output_path', type=str, default=None)
14 | 
15 |     opt = parser.parse_args()
16 | 
17 |     return opt
18 | 
19 | 
20 | def main(opt):
21 |     model_path = opt.model_path
22 |     output_path = opt.output_path
23 | 
24 |     tokenizer = AutoTokenizer.from_pretrained(model_path)
25 |     tokenizer.save_pretrained(output_path)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     opt = parse_option()
30 |     main(opt)


--------------------------------------------------------------------------------
/research/Reinforced_IR/finetune/retriever/arguments.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from FlagEmbedding.abc.finetune.embedder import AbsEmbedderDataArguments
 3 | 
 4 | from FlagEmbedding.abc.finetune.embedder import (
 5 |     AbsEmbedderTrainingArguments,
 6 |     AbsEmbedderModelArguments
 7 | )
 8 | 
 9 | 
10 | @dataclass
11 | class IREmbedderTrainingArguments(AbsEmbedderTrainingArguments):
12 |     """
13 |     Training argument class for M3.
14 |     """
15 |     training_type: str = field(default='retrieval_answer', metadata={"help": "whether to use answer"})
16 |     answer_temperature: float = field(default=None, metadata={"help": "temperature for answer"})
17 |     normalize_answer: bool = field(default=True, metadata={"help": "normalize answer"})
18 |     
19 | @dataclass
20 | class IREmbedderDataArguments(AbsEmbedderDataArguments):
21 |     """
22 |     Data argument class for M3.
23 |     """
24 |     answer_inbatch: bool = field(default=False)
25 | 


--------------------------------------------------------------------------------
/research/Reinforced_IR/finetune/retriever/run.py:
--------------------------------------------------------------------------------
 1 | from transformers import HfArgumentParser
 2 | 
 3 | from FlagEmbedding.abc.finetune.embedder import AbsEmbedderModelArguments
 4 | from runner import IREmbedderRunner
 5 | from arguments import IREmbedderTrainingArguments, IREmbedderDataArguments
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     parser = HfArgumentParser((AbsEmbedderModelArguments, IREmbedderDataArguments, IREmbedderTrainingArguments))
10 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
11 |     model_args: AbsEmbedderModelArguments
12 |     data_args: IREmbedderDataArguments
13 |     training_args: IREmbedderTrainingArguments
14 | 
15 |     runner = IREmbedderRunner(
16 |         model_args=model_args,
17 |         data_args=data_args,
18 |         training_args=training_args
19 |     )
20 |     runner.run()
21 | 


--------------------------------------------------------------------------------
/research/Reinforced_IR/inference/agent/__init__.py:
--------------------------------------------------------------------------------
1 | from .gpt import GPTAgent
2 | from .vllm import LLMAgent
3 | from .vllm_instruct import LLMInstructAgent


--------------------------------------------------------------------------------
/research/Reinforced_IR/requirements.txt:
--------------------------------------------------------------------------------
1 | FlagEmbedding
2 | vllm==0.7.1
3 | jinja2
4 | datasets
5 | sentencepiece
6 | modelscope
7 | peft
8 | deepspeed
9 | bitsandbytes


--------------------------------------------------------------------------------
/research/baai_general_embedding/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/baai_general_embedding/__init__.py


--------------------------------------------------------------------------------
/research/baai_general_embedding/finetune/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling import BiEncoderModel, EncoderOutput
2 | from .trainer import BiTrainer
3 | 


--------------------------------------------------------------------------------
/research/baai_general_embedding/retromae_pretrain/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/AmazonCounterfactualClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "I wish I could have used this head set but the day I received it it wouldn't even turn on and I really wanted this product to work I'm very disappointed.",
 4 |         "response": "counterfactual"
 5 |     },
 6 |     {
 7 |         "query": "I would advise that instead of trying to follow these poor instructions, Google it.",
 8 |         "response": "not-counterfactual"
 9 |     },
10 |     {
11 |         "query": "I wrote to Monster customer service before ordering and they told me it would be fine to use without a converter and it was absolutely true.",
12 |         "response": "not-counterfactual"
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/AmazonPolarityClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Hunting the Hard Way Thia was a gift for my Husband, who loved the book. It arrived on the date we were told it would.",
 4 |         "response": "positive"
 5 |     },
 6 |     {
 7 |         "query": "Poor DVD Has too many interviews with people at the Live THomas day in Penn. My kids were annoyed and hated this DVD.",
 8 |         "response": "negative"
 9 |     },
10 |     {
11 |         "query": "Ludicrous and silly I remember getting this book so faintly that that says alot about my opinion of it. Basically, while I will entertain lots of odd ideas and theories, this book was basically silly.",
12 |         "response": "negative"
13 |     },
14 |     {
15 |         "query": "Artistry I think that the Deodato concerts are very rich, as he used real strings and band musicians, as well as you can appreciate the John Tropea excelent renditions on guitar.",
16 |         "response": "positive"
17 |     }
18 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ArxivClusteringS2S.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "A Survey on Graph Neural Networks: Algorithms and Applications",
 4 |         "response": "cs"
 5 |     },
 6 |     {
 7 |         "query": "Hamiltonian Dynamics and KAM Theory for Infinite-Dimensional Systems",
 8 |         "response": "math"
 9 |     },
10 |     {
11 |         "query": "Dark Matter Distribution in Dwarf Spheroidal Galaxies: Constraints from Stellar Kinematics",
12 |         "response": "astro-ph"
13 |     },
14 |     {
15 |         "query": "Decoherence and Quantum Error Correction in Topological Quantum Computers",
16 |         "response": "quant-ph"
17 |     },
18 |     {
19 |         "query": "Spin-Orbit Coupling Effects in Low-Dimensional Quantum Materials",
20 |         "response": "cond-mat"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/BIOSSES.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Recent studies have highlighted the crucial role of p53 in regulating cell cycle progression.",
 4 |         "response": "Recent research underscores p53's pivotal function in controlling cellular division."
 5 |     },
 6 |     {
 7 |         "query": "Neuroscience has revealed intricate pathways linking dopamine to reward and motivation.",
 8 |         "response": "Recent neuroscientific findings have illuminated complex dopamine pathways associated with motivation and reward."
 9 |     },
10 |     {
11 |         "query": "Stem cell research holds promise for treating a variety of degenerative diseases.",
12 |         "response": "The potential of stem cell research in combating degenerative illnesses is widely recognized."
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/Banking77Classification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "What is my money worth in other countries?",
 4 |         "response": "exchange_rate"
 5 |     },
 6 |     {
 7 |         "query": "What can I do if my card still hasn't arrived after 2 weeks?",
 8 |         "response": "card_arrival"
 9 |     },
10 |     {
11 |         "query": "Would I be able to open an account for my daughter?",
12 |         "response": "age_limit"
13 |     },
14 |     {
15 |         "query": "My address details have changed and I want to update them",
16 |         "response": "edit_personal_details"
17 |     },
18 |     {
19 |         "query": "If my cash withdrawal is still not showing, is something wrong?",
20 |         "response": "pending_cash_withdrawal"
21 |     },
22 |     {
23 |         "query": "How long do transfers typically take? Is there a way of speeding the process up? My friend needs the money I sent her desperately.",
24 |         "response": "transfer_not_received_by_recipient"
25 |     }
26 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/BiorxivClusteringS2S.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Neural Circuit Dynamics in Decision-Making: A Computational Model of Prefrontal-Striatal Interactions",
 4 |         "response": "neuroscience"
 5 |     },
 6 |     {
 7 |         "query": "Metagenomic Insights into Extreme Environments: Microbial Diversity and Functional Adaptations in Antarctic Lakes",
 8 |         "response": "microbiology"
 9 |     },
10 |     {
11 |         "query": "Machine Learning Approaches for Predicting Protein Structure and Function from Sequence Data",
12 |         "response": "bioinformatics"
13 |     },
14 |     {
15 |         "query": "Regulation of Stem Cell Fate Decisions by the Hippo Signaling Pathway: Implications for Tissue Regeneration and Cancer Therapy",
16 |         "response": "cell biology"
17 |     },
18 |     {
19 |         "query": "Optical Tweezers and Single-Molecule Force Spectroscopy: Probing Protein Folding Dynamics and Mechanical Properties of Biomolecules",
20 |         "response": "biophysics"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/CQADupstackRetrieval.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "angularjs infinite scroll in a container",
 4 |         "response": "AngularJS ng-infinite-scroll not working on a specific container/div"
 5 |     },
 6 |     {
 7 |         "query": "Java: Efficiently converting an array of longs to an array of bytes",
 8 |         "response": "Most Compact way to Serialize an Array of Longs in Java"
 9 |     },
10 |     {
11 |         "query": "PyVISA missing methods",
12 |         "response": "NI VISA + pyVisa on Mac OS X (Snow Leopard)"
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/EmotionClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "i am bothered is that he might changed his feelings once he get back in us and leave me heartbroken",
 4 |         "response": "sadness"
 5 |     },
 6 |     {
 7 |         "query": "i have always loved my jobs and loved to work and i truly feel like being back there with my patients and co workers will do me a lot of good even if it is only for a few weeks",
 8 |         "response": "joy"
 9 |     },
10 |     {
11 |         "query": "i certainly feel loved and appreciated and grateful for all that i have",
12 |         "response": "love"
13 |     },
14 |     {
15 |         "query": "im grabbing a minute to post i feel greedy wrong",
16 |         "response": "anger"
17 |     },
18 |     {
19 |         "query": "i was stymied a little bit as i wrote feeling unsure that i might go somewhere with the story unintended",
20 |         "response": "fear"
21 |     },
22 |     {
23 |         "query": "i keep feeling pleasantly surprised at his supportiveness and also his ease in new situations",
24 |         "response": "surprise"
25 |     }
26 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MTOPDomainClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "I am no longer available",
 4 |         "response": "calling"
 5 |     },
 6 |     {
 7 |         "query": "Cancel my reminder about my dentist appointment",
 8 |         "response": "reminder"
 9 |     },
10 |     {
11 |         "query": "Will it rain tomorrow?",
12 |         "response": "weather"
13 |     },
14 |     {
15 |         "query": "Create an appointment alarm for 11:30am.",
16 |         "response": "allarm"
17 |     },
18 |     {
19 |         "query": "Play a different playlist",
20 |         "response": "music"
21 |     },
22 |     {
23 |         "query": "What's the best way to fry chicken",
24 |         "response": "recipes"
25 |     },
26 |     {
27 |         "query": "what city does Ahmed live in ?",
28 |         "response": "people"
29 |     }
30 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MTOPIntentClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "When will my next alarm start",
 4 |         "response": "GET_ALARM"
 5 |     },
 6 |     {
 7 |         "query": "I need you to message Zachary Fletcher",
 8 |         "response": "SEND_MESSAGE"
 9 |     },
10 |     {
11 |         "query": "show me video messages from Atlas",
12 |         "response": "GET_MESSAGE"
13 |     },
14 |     {
15 |         "query": "I want to listen to AC/DC please",
16 |         "response": "PLAY_MUSIC"
17 |     },
18 |     {
19 |         "query": "Make an alarm for the next 7 weeks for Thursday at 6pm",
20 |         "response": "CREATE_ALARM"
21 |     },
22 |     {
23 |         "query": "fairs happening in ann arbor next week",
24 |         "response": "GET_EVENT"
25 |     },
26 |     {
27 |         "query": "Will we get a frost this week?",
28 |         "response": "GET_WEATHER"
29 |     }
30 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MassiveIntentClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "remind me to pay rent every month",
 4 |         "response": "calendar_set"
 5 |     },
 6 |     {
 7 |         "query": "please play yesterday from beatles",
 8 |         "response": "play_music"
 9 |     },
10 |     {
11 |         "query": "what will the temperatures be for the next week",
12 |         "response": "weather_query"
13 |     },
14 |     {
15 |         "query": "give me the detailed schedule for next week",
16 |         "response": "calendar_query"
17 |     },
18 |     {
19 |         "query": "what's happening in my day",
20 |         "response": "general_quirky"
21 |     },
22 |     {
23 |         "query": "dolores how was your day",
24 |         "response": "general_quirky"
25 |     },
26 |     {
27 |         "query": "who was appointed as deputy centimeter of uttar pradesh",
28 |         "response": "qa_factoid"
29 |     },
30 |     {
31 |         "query": "find me news about trumps speech",
32 |         "response": "news_query"
33 |     }
34 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MassiveScenarioClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "can you confirm that my meeting for tomorrow has been canceled",
 4 |         "response": "calendar"
 5 |     },
 6 |     {
 7 |         "query": "please open my music application and play games by disturbed",
 8 |         "response": "play"
 9 |     },
10 |     {
11 |         "query": "what's the word orange mean",
12 |         "response": "qa"
13 |     },
14 |     {
15 |         "query": "find me all mails from magda with holidays word in the title",
16 |         "response": "email"
17 |     },
18 |     {
19 |         "query": "get a cup of coffee ready now",
20 |         "response": "iot"
21 |     },
22 |     {
23 |         "query": "good morning olly",
24 |         "response": "general"
25 |     }
26 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MedrxivClusteringS2S.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Longitudinal Analysis of SARS-CoV-2 Neutralizing Antibody Titers and Viral Load in Asymptomatic and Symptomatic Patients",
 4 |         "response": "infectious diseases"
 5 |     },
 6 |     {
 7 |         "query": "Impact of Public Health Messaging and Community Engagement on Vaccination Uptake During the COVID-19 Pandemic",
 8 |         "response": "epidemiology"
 9 |     },
10 |     {
11 |         "query": "Long-term Effects of Ambient Temperature on COPD Hospitalizations: A Population-based Analysis in Northern Europe",
12 |         "response": "public and global health"
13 |     },
14 |     {
15 |         "query": "Genomic Landscape of Rare Genetic Disorders Revealed through Whole-Exome Sequencing in Pediatric Populations",
16 |         "response": "genetic and genomic medicine"
17 |     },
18 |     {
19 |         "query": "Impact of Gut Microbiota on Neuroinflammation and Cognitive Function in Multiple Sclerosis Patients: A Prospective Study",
20 |         "response": "neurology"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MindSmallReranking.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "'Wheel Of Fortune' Guest Delivers Hilarious, Off The Rails Introduction",
 4 |         "response": "Charles Rogers, former Michigan State football, Detroit Lions star, dead at 38"
 5 |     },
 6 |     {
 7 |         "query": "Eliud Kipchoge runs 1:59 marathon, first to break 2 hours",
 8 |         "response": "AP-NORC poll: Many youths say high school diploma is enough"
 9 |     }
10 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/QuoraRetrieval.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Why do people say Dhanush (South Indian actor) is ugly? I don't think so.?",
 4 |         "response": "Why do people say Dhanush (South Indian actor) is ugly? I don't think so?"
 5 |     },
 6 |     {
 7 |         "query": "What are some hit and nice ideas about architecture dissertation topics?",
 8 |         "response": "What are some interesting undergraduate architecture thesis topics?"
 9 |     },
10 |     {
11 |         "query": "Could someone please motivate me?",
12 |         "response": "Can you motivate me?"
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/RedditClustering.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Financial Meltdown: Strategies for Surviving Economic Collapse",
 4 |         "response": "collapse.txt"
 5 |     },
 6 |     {
 7 |         "query": "Exclusive Comic Book Sale: Don't Miss Out on January 13th!",
 8 |         "response": "comicbooks.txt"
 9 |     },
10 |     {
11 |         "query": "Tchaikovsky's Untold Story: The Mystery Behind Symphony No. 7",
12 |         "response": "classicalmusic.txt"
13 |     },
14 |     {
15 |         "query": "Coffee Addiction: When It's More Than Just a Drink",
16 |         "response": "Coffee.txt"
17 |     },
18 |     {
19 |         "query": "Understanding Boeing's Micro-Missile Capabilities",
20 |         "response": "aviation.txt"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SICK-R.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "The cat is lounging on the sunny windowsill.",
 4 |         "response": "The feline is resting on the sunny windowsill."
 5 |     },
 6 |     {
 7 |         "query": "A woman is reading a book while sitting on a bench.",
 8 |         "response": "A lady is reading a book while seated on a bench."
 9 |     },
10 |     {
11 |         "query": "The child is drawing with crayons on a piece of paper.",
12 |         "response": "The kid is using crayons to draw on a sheet of paper."
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/STS12.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "A man is dancing on the ceiling.",
 4 |         "response": "A man is dancing on the ceiling of a room."
 5 |     },
 6 |     {
 7 |         "query": "That is a shameful state of affairs when we consider that the EU itself is a champion of modernised business practice.",
 8 |         "response": "It is a shame when it is thought that the European Union is posed as a champion modernization of the economic life!"
 9 |     },
10 |     {
11 |         "query": "Spain has done a magnificent job in turning round the difficult neighbourly relations which Europe and North Africa and Spain and Morocco have suffered during the course of history.",
12 |         "response": "Spain has developed a remarkably positive the difficult neighbourhood which has always existed between Europe and North Africa and between Spain and Morocco."
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/STS13.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "the state of being exposed to danger or harm",
 4 |         "response": "the condition of being at risk of injury or loss."
 5 |     },
 6 |     {
 7 |         "query": "a set of instructions for a computer",
 8 |         "response": "directions given to a computer to perform a specific task."
 9 |     },
10 |     {
11 |         "query": "a building used for public worship",
12 |         "response": "a place where people gather to worship collectively."
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/STS14.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "president obama vows to work with congress on immigration reform .",
 4 |         "response": "obama pledges to collaborate with congress on immigration overhaul ."
 5 |     },
 6 |     {
 7 |         "query": "britain votes to leave european union .",
 8 |         "response": "uk votes to leave eu ."
 9 |     },
10 |     {
11 |         "query": "russian president putin signs law banning adoption of russian children by u.s. citizens .",
12 |         "response": "putin bans u.s. adoptions of russian children ."
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/STS15.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "The battery and bulb A are not in the same path",
 4 |         "response": "Bulb A and the battery are not in the same circuit."
 5 |     },
 6 |     {
 7 |         "query": "Switch Y and bulb B are in the same loop",
 8 |         "response": "Switch Y and bulb B belong to the same circuit."
 9 |     },
10 |     {
11 |         "query": "new york city marathon canceled due to hurricane sandy",
12 |         "response": "nyc marathon canceled because of hurricane sandy"
13 |     },
14 |     {
15 |         "query": "pope francis calls for peace in syria during sunday address",
16 |         "response": "pope francis appeals for peace in syria in his sunday speech"
17 |     }
18 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/STS16.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "what are the symptoms of a heart attack ?",
 4 |         "response": "what are the signs of a heart attack ?"
 5 |     },
 6 |     {
 7 |         "query": "how do i change a flat tire on my car ?",
 8 |         "response": "what steps should i take to replace a flat tire ?"
 9 |     },
10 |     {
11 |         "query": "how do i cook a medium rare steak ?",
12 |         "response": "what's the best way to prepare a steak to medium rare ?"
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/STS17.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "The sun is setting over the mountains.",
 4 |         "response": " \"The sun sets behind the mountains.\""
 5 |     },
 6 |     {
 7 |         "query": "A child is playing with a red ball.",
 8 |         "response": " \"A kid plays with a red ball.\""
 9 |     },
10 |     {
11 |         "query": "Two people are sitting on a bench in the park.",
12 |         "response": " \"Two individuals are seated on a bench in the park.\""
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/STSBenchmark.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Agribusiness: Mad cow disease found in California",
 4 |         "response": "USDA Confirms Case of Mad Cow Disease in California"
 5 |     },
 6 |     {
 7 |         "query": "santos stated colombian police found the evidence in 2 computers discovered with slain rebel leader raul reyes. ",
 8 |         "response": "francisco santos stated that colombian police found the evidence on two computers discovered with raul reyes."
 9 |     },
10 |     {
11 |         "query": "US Attorney General Holder resigns",
12 |         "response": "US Attorney general Eric Holder to resign"
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SprintDuplicateQuestions.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Kyocera duraforce pro international roaming settings",
 4 |         "response": "Make a call while roaming internationally - Kyocera DuraForce PRO"
 5 |     },
 6 |     {
 7 |         "query": "Guide for connecting to the Sprint U301 USB mobile broadband",
 8 |         "response": "Turn automatic connections on or off - Sprint U301 USB Device Sprint 3G/4G Mobile Broadband"
 9 |     },
10 |     {
11 |         "query": "What do you think is a reason that is preventing troubleshooting on my HTC One A9 related to issues to the mobile hotspots ?",
12 |         "response": "Troubleshoot issues related to mobile hotspots and your HTC One A9"
13 |     },
14 |     {
15 |         "query": "Why has my Samsung Transform been freezing everytime I attempt to open up an app ?",
16 |         "response": "Why is my Samsung Transform freezing or being unresponsive ?"
17 |     },
18 |     {
19 |         "query": "What can I do to turn on Wi-Fi on the HTC One A9 ?",
20 |         "response": "Turn on and connect to Wi-Fi - HTC One A9"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/StackExchangeClustering.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Recommendations for a lightweight Markdown editor with real-time collaboration features?",
 4 |         "response": "softwarerecs.stackexchange.com.txt"
 5 |     },
 6 |     {
 7 |         "query": "How to integrate external APIs with EOSIO blockchain applications?",
 8 |         "response": "eosio.stackexchange.com.txt"
 9 |     },
10 |     {
11 |         "query": "How to balance macros for effective fat loss and muscle retention?",
12 |         "response": "fitness.stackexchange.com.txt"
13 |     },
14 |     {
15 |         "query": "Can amans\" be used as a substantival participle in Latin?\"",
16 |         "response": "latin.stackexchange.com.txt"
17 |     },
18 |     {
19 |         "query": "Is it normal for a 2018 Audi A4 to consume coolant frequently?",
20 |         "response": "mechanics.stackexchange.com.txt"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/StackOverflowDupQuestions.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "How to handle onChange event in React when state changes programmatically?",
 4 |         "response": "React onChange event not firing when state is updated programmatically"
 5 |     },
 6 |     {
 7 |         "query": "How to simulate a click event on a button using JavaScript?",
 8 |         "response": "JavaScript button click event simulation"
 9 |     },
10 |     {
11 |         "query": "Python: How to run a function asynchronously using asyncio?",
12 |         "response": "Asyncio: Running Python function asynchronously"
13 |     }
14 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SummEval.json:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "query": "passenger jin pai , 35 , was standing on the rim of a toilet when it collapsed , leaving him hospitalised with deep cuts on his leg and buttocks after he broke a toilet he was squatting on . passenger jin pai , 35 , was standing on the rim of a toilet when it smashed to the ground . according to airport officials he had not wanted to let his bottom touch the seat because he was ' worried it might not be clean ' .",
4 |         "response": "Jin Pai was standing on rim of a toilet in Hefei Xinqiao International Airport. The porcelain toilet then tipped over and shattered on the floor. The 35-year-old is left with deep cuts to his leg and buttocks."
5 |     }
6 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TweetSentimentExtractionClassification.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "I`d have responded, if I were going",
 4 |         "response": "neutral"
 5 |     },
 6 |     {
 7 |         "query": "what interview! leave me alone",
 8 |         "response": "negative"
 9 |     },
10 |     {
11 |         "query": "2am feedings for the baby are fun when he is all smiles and coos",
12 |         "response": "positive"
13 |     },
14 |     {
15 |         "query": "is cleaning the house for her family who is comming later today..",
16 |         "response": "neutral"
17 |     },
18 |     {
19 |         "query": "Sick. With a flu like thing.",
20 |         "response": "negative"
21 |     },
22 |     {
23 |         "query": "We saw that in none 3D - the baddie`s the best",
24 |         "response": "positive"
25 |     }
26 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TwentyNewsgroupsClustering.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Major flaw discovered in widely-used encryption protocol",
 4 |         "response": "sci.crypt"
 5 |     },
 6 |     {
 7 |         "query": "Bruins' Unstoppable Winning Streak",
 8 |         "response": "rec.sport.hockey"
 9 |     },
10 |     {
11 |         "query": "Comparing Windows File Systems: NTFS vs. FAT32 vs. exFAT",
12 |         "response": "comp.os.ms-windows.misc"
13 |     },
14 |     {
15 |         "query": "Troubleshooting a Digital Multimeter Calibration Issue",
16 |         "response": "sci.electronics"
17 |     },
18 |     {
19 |         "query": "Understanding DPI Scaling in X Window Systems",
20 |         "response": "comp.windows.x"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TwitterSemEval2015.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Excited for the new Game of Thrones episode tonight!",
 4 |         "response": "Can't wait for tonight's Game of Thrones episode!"
 5 |     },
 6 |     {
 7 |         "query": "Just finished a 5k run and feel amazing!",
 8 |         "response": "Completed a 5k run and I'm feeling great!"
 9 |     },
10 |     {
11 |         "query": "Had an incredible dinner at Joe's Italian Restaurant.",
12 |         "response": "Joe's Italian Restaurant served an amazing dinner tonight."
13 |     },
14 |     {
15 |         "query": "I need a vacation. Can't wait to hit the beach.",
16 |         "response": "Desperately need a holiday. Looking forward to beach time."
17 |     },
18 |     {
19 |         "query": "The new iPhone has some fantastic features!",
20 |         "response": "Loving the features on the new iPhone!"
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TwitterURLCorpus.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "query": "Elon Musk says Tesla will be profitable next quarter.",
 4 |         "response": "Elon Musk claims Tesla will turn a profit next quarter."
 5 |     },
 6 |     {
 7 |         "query": "The new iPhone just got announced and it's amazing.",
 8 |         "response": "Apple just unveiled the new iPhone and it's incredible."
 9 |     },
10 |     {
11 |         "query": "Beyonc\u00e9's new album has topped the charts in its first week.",
12 |         "response": "Beyonc\u00e9's latest album debuted at number one on the charts."
13 |     },
14 |     {
15 |         "query": "Breaking: Major earthquake hits California.",
16 |         "response": "Just in: Large earthquake strikes California."
17 |     },
18 |     {
19 |         "query": "NASA plans to send humans to Mars by 2030.",
20 |         "response": "NASA aims to have astronauts on Mars by the year 2030."
21 |     }
22 | ]


--------------------------------------------------------------------------------
/research/llm_embedder/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/llm_embedder/evaluation/__init__.py


--------------------------------------------------------------------------------
/research/llm_embedder/imgs/llm-embedder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/llm_embedder/imgs/llm-embedder.png


--------------------------------------------------------------------------------
/research/llm_embedder/src/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | logging.basicConfig(
 3 |     level=logging.INFO,
 4 |     format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
 5 |     datefmt="%m/%d/%Y %H:%M:%S",
 6 | )
 7 | 
 8 | # import transformers
 9 | # transformers.logging.set_verbosity_error()
10 | 


--------------------------------------------------------------------------------
/research/llm_embedder/src/lm/__init__.py:
--------------------------------------------------------------------------------
1 | from .args import LMArgs, SRLMArgs, GenerationArgs
2 | from .modeling_lm import LM
3 | from .modeling_srlm import SelfRetrievalLM
4 | 


--------------------------------------------------------------------------------
/research/llm_embedder/src/retrieval/__init__.py:
--------------------------------------------------------------------------------
1 | from .args import RetrievalArgs, RankerArgs
2 | from .modeling_dense import DenseRetriever
3 | from .modeling_bm25 import BM25Retriever, NaiveBM25Retriever
4 | from .modeling_unified import Retriever
5 | from .modeling_ranker import CrossEncoder
6 | from .metrics import RetrievalMetric
7 | from .data import RetrievalDataset, RetrievalDataCollator, TASK_CONFIG
8 | 


--------------------------------------------------------------------------------
/research/llm_embedder/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .util import FileLogger, Sequential_Sampler, DatasetProcessFn, DefaultDataCollator, makedirs, split_file_dir_name_ext, clear_dir, get_max_length_in_nested_lists, pad_nested_lists, mask_nested_lists, are_elements_of_same_length, normalize_text, load_json, save_json, load_pickle, save_pickle, add_eos, remove_eos


--------------------------------------------------------------------------------
/research/llm_reranker/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/research/llm_reranker/evaluation/BEIR-bge-en-v1.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/llm_reranker/evaluation/BEIR-bge-en-v1.5.png


--------------------------------------------------------------------------------
/research/llm_reranker/evaluation/BEIR-e5-mistral.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/llm_reranker/evaluation/BEIR-e5-mistral.png


--------------------------------------------------------------------------------
/research/llm_reranker/evaluation/CMTEB-retrieval-bge-zh-v1.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/llm_reranker/evaluation/CMTEB-retrieval-bge-zh-v1.5.png


--------------------------------------------------------------------------------
/research/llm_reranker/evaluation/llama-index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/llm_reranker/evaluation/llama-index.png


--------------------------------------------------------------------------------
/research/llm_reranker/evaluation/miracl-bge-m3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/llm_reranker/evaluation/miracl-bge-m3.png


--------------------------------------------------------------------------------
/research/llm_reranker/finetune_for_instruction/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/research/llm_reranker/finetune_for_layerwise/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/research/llm_reranker/merge/__init__.py:
--------------------------------------------------------------------------------
1 | from .merge_base_model import merge_llm
2 | from .merge_layerwise_model_from_raw_model import merge_layerwise_raw_llm
3 | from .merge_layerwise_model_from_finetuned_model import merge_layerwise_finetuned_llm


--------------------------------------------------------------------------------
/research/old-examples/finetune/ds_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 12,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 | 
11 |     "bf16": {
12 |         "enabled": "auto"
13 |     },
14 | 
15 |     "optimizer": {
16 |         "type": "AdamW",
17 |         "params": {
18 |             "lr": "auto",
19 |             "betas": "auto",
20 |             "eps": "auto",
21 |             "weight_decay": "auto"
22 |         }
23 |     },
24 | 
25 |     "scheduler": {
26 |         "type": "WarmupDecayLR",
27 |         "params": {
28 |             "warmup_min_lr": "auto",
29 |             "warmup_max_lr": "auto",
30 |             "warmup_num_steps": "auto",
31 |             "total_num_steps": "auto"
32 |         }
33 |     },
34 | 
35 |     "zero_optimization": {
36 |         "stage": 0
37 |     },
38 | 
39 |     "gradient_accumulation_steps": "auto",
40 |     "gradient_clipping": "auto",
41 |     "steps_per_print": 100,
42 |     "train_batch_size": "auto",
43 |     "train_micro_batch_size_per_gpu": "auto",
44 |     "wall_clock_breakdown": false
45 | }
46 | 


--------------------------------------------------------------------------------
/research/old-examples/finetune/toy_evaluation_data/toy_corpus.json:
--------------------------------------------------------------------------------
1 | {"content": "A is ..."}
2 | {"content": "B is ..."}
3 | {"content": "C is ..."}
4 | {"content": "Panda is ..."}
5 | {"content": "... is A"}


--------------------------------------------------------------------------------
/research/old-examples/finetune/toy_evaluation_data/toy_query.json:
--------------------------------------------------------------------------------
1 | {"query": "What is A?", "positive": ["A is ...", "... is A"]}
2 | {"query": "What is B?", "positive": ["B is ..."]}
3 | {"query": "What is C?", "positive": ["C is ..."]}


--------------------------------------------------------------------------------
/research/old-examples/pretrain/retromae_pretrain/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/research/old-examples/search_demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/old-examples/search_demo/__init__.py


--------------------------------------------------------------------------------
/research/old-examples/search_demo/arguments.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | 
 4 | @dataclass
 5 | class ModelArguments:
 6 |     model_name_or_path: str = field(
 7 |         default='BAAI/bge-large-zh-noinstruct',
 8 |         metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
 9 |     )
10 | 
11 | 
12 | @dataclass
13 | class DataArguments:
14 |     data_path: str = field(
15 |         default='./data', metadata={"help": "Path to wikipedia-22-12"}
16 |     )
17 | 


--------------------------------------------------------------------------------
/research/old-examples/search_demo/requirements.txt:
--------------------------------------------------------------------------------
 1 | datasets==2.14.0
 2 | faiss-gpu==1.7.2
 3 | langchain==0.0.244
 4 | numpy==1.23.3
 5 | pyserini==0.21.0
 6 | tiktoken==0.4.0
 7 | torch==2.0.1
 8 | torch_geometric==2.3.1
 9 | tqdm==4.65.0
10 | transformers==4.30.2
11 | openai==0.27.4
12 | urllib3==1.25.11


--------------------------------------------------------------------------------
/research/old-examples/search_demo/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/old-examples/search_demo/run.py


--------------------------------------------------------------------------------
/research/old-examples/search_demo/tool.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/old-examples/search_demo/tool.py


--------------------------------------------------------------------------------
/research/reranker/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/research/visual_bge/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling import Visualized_BGE


--------------------------------------------------------------------------------
/research/visual_bge/imgs/SFT-CIRR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/SFT-CIRR.png


--------------------------------------------------------------------------------
/research/visual_bge/imgs/SFT-ReMuQ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/SFT-ReMuQ.png


--------------------------------------------------------------------------------
/research/visual_bge/imgs/SFT-WebQA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/SFT-WebQA.png


--------------------------------------------------------------------------------
/research/visual_bge/imgs/cir_candi_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/cir_candi_1.png


--------------------------------------------------------------------------------
/research/visual_bge/imgs/cir_candi_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/cir_candi_2.png


--------------------------------------------------------------------------------
/research/visual_bge/imgs/cir_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/cir_query.png


--------------------------------------------------------------------------------
/research/visual_bge/imgs/wiki_candi_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/wiki_candi_1.jpg


--------------------------------------------------------------------------------
/research/visual_bge/imgs/wiki_candi_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/wiki_candi_2.jpg


--------------------------------------------------------------------------------
/research/visual_bge/imgs/zs-benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/zs-benchmark.png


--------------------------------------------------------------------------------
/research/visual_bge/imgs/zs-performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/imgs/zs-performance.png


--------------------------------------------------------------------------------
/research/visual_bge/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="visual_bge",
 5 |     version="0.1.0",
 6 |     description='visual_bge',
 7 |     long_description="./README.md",
 8 |     long_description_content_type="text/markdown",
 9 |     url='https://github.com/FlagOpen/FlagEmbedding/tree/master/research/visual_bge',
10 |     packages=find_packages(),
11 |     install_requires=[
12 |         'torchvision',
13 |         'timm',
14 |         'einops',
15 |         'ftfy'
16 |     ],
17 |     python_requires='>=3.6',
18 | )
19 | 


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/__init__.py:
--------------------------------------------------------------------------------
 1 | from .constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
 2 | from .factory import create_model, create_model_and_transforms, create_model_from_pretrained, get_tokenizer, create_eva_vision_and_transforms
 3 | from .factory import list_models, add_model_config, get_model_config, load_checkpoint
 4 | from .loss import ClipLoss
 5 | from .model import CLIP, CustomCLIP, CLIPTextCfg, CLIPVisionCfg,\
 6 |     convert_weights_to_lp, convert_weights_to_fp16, trace_model, get_cast_dtype
 7 | from .openai import load_openai_model, list_openai_models
 8 | from .pretrained import list_pretrained, list_pretrained_models_by_tag, list_pretrained_tags_by_model,\
 9 |     get_pretrained_url, download_pretrained_from_url, is_pretrained_cfg, get_pretrained_cfg, download_pretrained
10 | from .tokenizer import SimpleTokenizer, tokenize
11 | from .transform import image_transform


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FlagOpen/FlagEmbedding/5e64baa61e75df23105a66d1e9d09ad799366e2a/research/visual_bge/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/constants.py:
--------------------------------------------------------------------------------
1 | OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073)
2 | OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)
3 | 


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-B-16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 768,
 7 |         "patch_size": 16,
 8 |         "eva_model_name": "eva-clip-b-16",
 9 |         "ls_init_value": 0.1,
10 |         "drop_path_rate": 0.0
11 |     },
12 |     "text_cfg": {
13 |         "context_length": 77,
14 |         "vocab_size": 49408,
15 |         "width": 512,
16 |         "heads": 8,
17 |         "layers": 12
18 |     }
19 | }


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 40,
 6 |         "width": 1408,
 7 |         "head_width": 88,
 8 |         "mlp_ratio": 4.3637,
 9 |         "patch_size": 14,
10 |         "eva_model_name": "eva-clip-g-14-x",
11 |         "drop_path_rate": 0,
12 |         "xattn": true,
13 |         "fusedLN": true
14 |     },
15 |     "text_cfg": {
16 |         "context_length": 77,
17 |         "vocab_size": 49408,
18 |         "width": 1024,
19 |         "heads": 16,
20 |         "layers": 24,
21 |         "xattn": false,
22 |         "fusedLN": true
23 |     }
24 | }


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 40,
 6 |         "width": 1408,
 7 |         "head_width": 88,
 8 |         "mlp_ratio": 4.3637,
 9 |         "patch_size": 14,
10 |         "eva_model_name": "eva-clip-g-14-x",
11 |         "drop_path_rate": 0.4,
12 |         "xattn": true,
13 |         "fusedLN": true
14 |     },
15 |     "text_cfg": {
16 |         "context_length": 77,
17 |         "vocab_size": 49408,
18 |         "width": 768,
19 |         "heads": 12,
20 |         "layers": 12,
21 |         "xattn": false,
22 |         "fusedLN": true
23 |     }
24 | }


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-B-16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 768,
 7 |         "head_width": 64,
 8 |         "patch_size": 16,
 9 |         "mlp_ratio": 2.6667,
10 |         "eva_model_name": "eva-clip-b-16-X",
11 |         "drop_path_rate": 0.0,
12 |         "xattn": true,
13 |         "fusedLN": true,
14 |         "rope": true,
15 |         "pt_hw_seq_len": 16,
16 |         "intp_freq": true,
17 |         "naiveswiglu": true,
18 |         "subln": true,
19 |         "patch_dropout": 0.5
20 |     },
21 |     "text_cfg": {
22 |         "context_length": 77,
23 |         "vocab_size": 49408,
24 |         "width": 512,
25 |         "heads": 8,
26 |         "layers": 12,
27 |         "xattn": true,
28 |         "fusedLN": true
29 |     }
30 | }


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14-336.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 336,
 5 |         "layers": 24,
 6 |         "width": 1024,
 7 |         "drop_path_rate": 0,
 8 |         "head_width": 64,
 9 |         "mlp_ratio": 2.6667,
10 |         "patch_size": 14,
11 |         "eva_model_name": "eva-clip-l-14-336",
12 |         "xattn": true,
13 |         "fusedLN": true,
14 |         "rope": true,
15 |         "pt_hw_seq_len": 16,
16 |         "intp_freq": true,
17 |         "naiveswiglu": true,
18 |         "subln": true
19 |     },
20 |     "text_cfg": {
21 |         "context_length": 77,
22 |         "vocab_size": 49408,
23 |         "width": 768,
24 |         "heads": 12,
25 |         "layers": 12,
26 |         "xattn": false,
27 |         "fusedLN": true
28 |     }
29 | }


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 24,
 6 |         "width": 1024,
 7 |         "drop_path_rate": 0,
 8 |         "head_width": 64,
 9 |         "mlp_ratio": 2.6667,
10 |         "patch_size": 14,
11 |         "eva_model_name": "eva-clip-l-14",
12 |         "xattn": true,
13 |         "fusedLN": true,
14 |         "rope": true,
15 |         "pt_hw_seq_len": 16,
16 |         "intp_freq": true,
17 |         "naiveswiglu": true,
18 |         "subln": true
19 |     },
20 |     "text_cfg": {
21 |         "context_length": 77,
22 |         "vocab_size": 49408,
23 |         "width": 768,
24 |         "heads": 12,
25 |         "layers": 12,
26 |         "xattn": false,
27 |         "fusedLN": true
28 |     }
29 | }


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14-plus.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 64,
 6 |         "width": 1792,
 7 |         "head_width": 112,
 8 |         "mlp_ratio": 8.571428571428571,
 9 |         "patch_size": 14,
10 |         "eva_model_name": "eva-clip-4b-14-x",
11 |         "drop_path_rate": 0,
12 |         "xattn": true,
13 |         "postnorm": true,
14 |         "fusedLN": true
15 |     },
16 |     "text_cfg": {
17 |         "context_length": 77,
18 |         "vocab_size": 49408,
19 |         "width": 1280,
20 |         "heads": 20,
21 |         "layers": 32,
22 |         "xattn": false,
23 |         "fusedLN": true
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 64,
 6 |         "width": 1792,
 7 |         "head_width": 112,
 8 |         "mlp_ratio": 8.571428571428571,
 9 |         "patch_size": 14,
10 |         "eva_model_name": "eva-clip-4b-14-x",
11 |         "drop_path_rate": 0,
12 |         "xattn": true,
13 |         "postnorm": true,
14 |         "fusedLN": true
15 |     },
16 |     "text_cfg": {
17 |         "context_length": 77,
18 |         "vocab_size": 49408,
19 |         "width": 1024,
20 |         "heads": 16,
21 |         "layers": 24,
22 |         "xattn": false,
23 |         "fusedLN": true
24 |     }
25 | }


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", mode="r", encoding="utf-8") as readme_file:
 4 |     readme = readme_file.read()
 5 | 
 6 | setup(
 7 |     name='FlagEmbedding',
 8 |     version='1.3.5',
 9 |     description='FlagEmbedding',
10 |     long_description=readme,
11 |     long_description_content_type="text/markdown",
12 |     author_email='2906698981@qq.com',
13 |     url='https://github.com/FlagOpen/FlagEmbedding',
14 |     packages=find_packages(),
15 |     include_package_data=True,
16 |     install_requires=[
17 |         'torch>=1.6.0',
18 |         'transformers>=4.44.2',
19 |         'datasets>=2.19.0',
20 |         'accelerate>=0.20.1',
21 |         'sentence_transformers',
22 |         'peft',
23 |         'ir-datasets',
24 |         'sentencepiece',
25 |         'protobuf'
26 |     ],
27 |     extras_require={
28 |         'finetune': ['deepspeed', 'flash-attn'],
29 |     },
30 | )
31 | 


--------------------------------------------------------------------------------