├── .gitignore ├── LICENSE ├── README.md ├── data ├── build_data_psg.py ├── construct_psg_data.sh ├── get_embedding.sh ├── openmatch │ ├── __init__.py │ ├── arguments.py │ ├── data_augmentation_strategy.py │ ├── dataset │ │ ├── __init__.py │ │ ├── beir_dataset.py │ │ ├── data_collator.py │ │ ├── inference_dataset.py │ │ └── train_dataset.py │ ├── driver │ │ ├── async_eval_dr.py │ │ ├── beir_eval_pipeline.py │ │ ├── build_index.py │ │ ├── generate_queries.py │ │ ├── generate_queries_contrastive.py │ │ ├── pretrain_dr.py │ │ ├── rerank.py │ │ ├── retrieve.py │ │ ├── successive_retrieve.py │ │ ├── train_dr.py │ │ ├── train_dr_distillation.py │ │ ├── train_qg.py │ │ └── train_rr.py │ ├── loss.py │ ├── modeling │ │ ├── __init__.py │ │ ├── dense_retrieval_model.py │ │ ├── linear.py │ │ └── reranking_model.py │ ├── qa_utils.py │ ├── retriever │ │ ├── __init__.py │ │ ├── contrastive_query_generator.py │ │ ├── dense_retriever.py │ │ └── reranker.py │ ├── test_retriever.py │ ├── trainer │ │ ├── __init__.py │ │ ├── dense_trainer.py │ │ └── reranker_trainer.py │ └── utils.py ├── retrieve.sh └── split.py ├── scripts ├── config │ └── ds_config_zero2.json ├── gen_forward.sh ├── gen_inference.sh ├── get_gen_data.sh ├── get_kr_data.sh ├── kr_gen_forward.sh ├── kr_inference.sh ├── merge_lora.sh ├── train_gen.sh └── train_kr.sh └── src ├── evaluate ├── eval.py └── eval_utils.py ├── generator ├── __pycache__ │ └── template.cpython-310.pyc ├── gen_dpo_data.py ├── gen_llm_response.py ├── merge_response_file.py ├── template.py └── train.py ├── knowledgeRefinement ├── __pycache__ │ └── template.cpython-310.pyc ├── gen_dpo_data.py ├── gen_llm_response.py ├── kr_inference.py ├── merge_response_file.py ├── template.py └── train.py └── merge_checkpoint.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/README.md -------------------------------------------------------------------------------- /data/build_data_psg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/build_data_psg.py -------------------------------------------------------------------------------- /data/construct_psg_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/construct_psg_data.sh -------------------------------------------------------------------------------- /data/get_embedding.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/get_embedding.sh -------------------------------------------------------------------------------- /data/openmatch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/openmatch/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/arguments.py -------------------------------------------------------------------------------- /data/openmatch/data_augmentation_strategy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/data_augmentation_strategy.py -------------------------------------------------------------------------------- /data/openmatch/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/dataset/__init__.py -------------------------------------------------------------------------------- /data/openmatch/dataset/beir_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/dataset/beir_dataset.py -------------------------------------------------------------------------------- /data/openmatch/dataset/data_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/dataset/data_collator.py -------------------------------------------------------------------------------- /data/openmatch/dataset/inference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/dataset/inference_dataset.py -------------------------------------------------------------------------------- /data/openmatch/dataset/train_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/dataset/train_dataset.py -------------------------------------------------------------------------------- /data/openmatch/driver/async_eval_dr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/async_eval_dr.py -------------------------------------------------------------------------------- /data/openmatch/driver/beir_eval_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/beir_eval_pipeline.py -------------------------------------------------------------------------------- /data/openmatch/driver/build_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/build_index.py -------------------------------------------------------------------------------- /data/openmatch/driver/generate_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/generate_queries.py -------------------------------------------------------------------------------- /data/openmatch/driver/generate_queries_contrastive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/generate_queries_contrastive.py -------------------------------------------------------------------------------- /data/openmatch/driver/pretrain_dr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/pretrain_dr.py -------------------------------------------------------------------------------- /data/openmatch/driver/rerank.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/rerank.py -------------------------------------------------------------------------------- /data/openmatch/driver/retrieve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/retrieve.py -------------------------------------------------------------------------------- /data/openmatch/driver/successive_retrieve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/successive_retrieve.py -------------------------------------------------------------------------------- /data/openmatch/driver/train_dr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/train_dr.py -------------------------------------------------------------------------------- /data/openmatch/driver/train_dr_distillation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/train_dr_distillation.py -------------------------------------------------------------------------------- /data/openmatch/driver/train_qg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/train_qg.py -------------------------------------------------------------------------------- /data/openmatch/driver/train_rr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/driver/train_rr.py -------------------------------------------------------------------------------- /data/openmatch/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/loss.py -------------------------------------------------------------------------------- /data/openmatch/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/modeling/__init__.py -------------------------------------------------------------------------------- /data/openmatch/modeling/dense_retrieval_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/modeling/dense_retrieval_model.py -------------------------------------------------------------------------------- /data/openmatch/modeling/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/modeling/linear.py -------------------------------------------------------------------------------- /data/openmatch/modeling/reranking_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/modeling/reranking_model.py -------------------------------------------------------------------------------- /data/openmatch/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/qa_utils.py -------------------------------------------------------------------------------- /data/openmatch/retriever/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/retriever/__init__.py -------------------------------------------------------------------------------- /data/openmatch/retriever/contrastive_query_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/retriever/contrastive_query_generator.py -------------------------------------------------------------------------------- /data/openmatch/retriever/dense_retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/retriever/dense_retriever.py -------------------------------------------------------------------------------- /data/openmatch/retriever/reranker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/retriever/reranker.py -------------------------------------------------------------------------------- /data/openmatch/test_retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/test_retriever.py -------------------------------------------------------------------------------- /data/openmatch/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/trainer/__init__.py -------------------------------------------------------------------------------- /data/openmatch/trainer/dense_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/trainer/dense_trainer.py -------------------------------------------------------------------------------- /data/openmatch/trainer/reranker_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/trainer/reranker_trainer.py -------------------------------------------------------------------------------- /data/openmatch/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/openmatch/utils.py -------------------------------------------------------------------------------- /data/retrieve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/retrieve.sh -------------------------------------------------------------------------------- /data/split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/data/split.py -------------------------------------------------------------------------------- /scripts/config/ds_config_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/config/ds_config_zero2.json -------------------------------------------------------------------------------- /scripts/gen_forward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/gen_forward.sh -------------------------------------------------------------------------------- /scripts/gen_inference.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/gen_inference.sh -------------------------------------------------------------------------------- /scripts/get_gen_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/get_gen_data.sh -------------------------------------------------------------------------------- /scripts/get_kr_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/get_kr_data.sh -------------------------------------------------------------------------------- /scripts/kr_gen_forward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/kr_gen_forward.sh -------------------------------------------------------------------------------- /scripts/kr_inference.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/kr_inference.sh -------------------------------------------------------------------------------- /scripts/merge_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/merge_lora.sh -------------------------------------------------------------------------------- /scripts/train_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/train_gen.sh -------------------------------------------------------------------------------- /scripts/train_kr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/scripts/train_kr.sh -------------------------------------------------------------------------------- /src/evaluate/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/evaluate/eval.py -------------------------------------------------------------------------------- /src/evaluate/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/evaluate/eval_utils.py -------------------------------------------------------------------------------- /src/generator/__pycache__/template.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/generator/__pycache__/template.cpython-310.pyc -------------------------------------------------------------------------------- /src/generator/gen_dpo_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/generator/gen_dpo_data.py -------------------------------------------------------------------------------- /src/generator/gen_llm_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/generator/gen_llm_response.py -------------------------------------------------------------------------------- /src/generator/merge_response_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/generator/merge_response_file.py -------------------------------------------------------------------------------- /src/generator/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/generator/template.py -------------------------------------------------------------------------------- /src/generator/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/generator/train.py -------------------------------------------------------------------------------- /src/knowledgeRefinement/__pycache__/template.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/knowledgeRefinement/__pycache__/template.cpython-310.pyc -------------------------------------------------------------------------------- /src/knowledgeRefinement/gen_dpo_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/knowledgeRefinement/gen_dpo_data.py -------------------------------------------------------------------------------- /src/knowledgeRefinement/gen_llm_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/knowledgeRefinement/gen_llm_response.py -------------------------------------------------------------------------------- /src/knowledgeRefinement/kr_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/knowledgeRefinement/kr_inference.py -------------------------------------------------------------------------------- /src/knowledgeRefinement/merge_response_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/knowledgeRefinement/merge_response_file.py -------------------------------------------------------------------------------- /src/knowledgeRefinement/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/knowledgeRefinement/template.py -------------------------------------------------------------------------------- /src/knowledgeRefinement/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/knowledgeRefinement/train.py -------------------------------------------------------------------------------- /src/merge_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMatch/RAG-DDR/HEAD/src/merge_checkpoint.py --------------------------------------------------------------------------------