├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── evaluation ├── README.md ├── bright │ ├── configs │ │ ├── bm25 │ │ │ ├── aops.json │ │ │ ├── biology.json │ │ │ ├── earth_science.json │ │ │ ├── economics.json │ │ │ ├── leetcode.json │ │ │ ├── pony.json │ │ │ ├── psychology.json │ │ │ ├── robotics.json │ │ │ ├── stackoverflow.json │ │ │ ├── sustainable_living.json │ │ │ ├── theoremqa.json │ │ │ ├── theoremqa_questions.json │ │ │ └── theoremqa_theorems.json │ │ └── reasonir │ │ │ ├── aops.json │ │ │ ├── biology.json │ │ │ ├── earth_science.json │ │ │ ├── economics.json │ │ │ ├── leetcode.json │ │ │ ├── pony.json │ │ │ ├── psychology.json │ │ │ ├── robotics.json │ │ │ ├── stackoverflow.json │ │ │ ├── sustainable_living.json │ │ │ ├── theoremqa.json │ │ │ ├── theoremqa_questions.json │ │ │ └── theoremqa_theorems.json │ ├── other_requirements.txt │ ├── prompts.py │ ├── requirements.txt │ ├── reranker.py │ ├── reranker_script.sh │ ├── retrievers.py │ ├── run.py │ └── script.sh └── rag │ ├── datastore │ ├── build_datastore.sh │ └── construct_datastore_corpus.py │ ├── gpqa │ ├── apis │ │ ├── base.py │ │ └── offline_massiveds_search_api.py │ ├── scripts │ │ └── evaluate_naive_rag.sh │ └── src │ │ ├── conf │ │ └── naive_rag_default.yaml │ │ ├── data │ │ └── datasets.py │ │ ├── eval │ │ └── evaluate.py │ │ ├── main.py │ │ ├── prompts │ │ └── task_instructions.py │ │ ├── utils │ │ ├── cache_utils.py │ │ ├── hydra_runner.py │ │ └── math_equivalence.py │ │ └── workflow │ │ └── naive_rag.py │ └── mmlu_cot │ ├── compute_accuracy.py │ ├── cot_prompt_lib │ └── initial_prompt.txt │ ├── evaluate_from_local_mmlu.py │ ├── extract_mmlu_group.py │ ├── scripts │ └── eval_llama_3_8b_mmlu_rag.sh │ └── utils │ └── extract_cot_as_queries.py ├── synthetic_data_generation ├── README.md ├── batch_api_helper.py ├── data_gen_prompts.py ├── doc_to_query.py ├── doc_to_query_batch.py ├── document_filters │ ├── __init__.py │ ├── basic_filters.py │ └── fineweb_edu_filter.py ├── gen_utils.py ├── generate_reasoning.py ├── generate_reasoning_batch.py ├── hard_negative_mining.py ├── lm_helper.py ├── requirements.txt ├── script.sh ├── script_batch.sh ├── setup_java.sh └── supplement_negative_passage.py ├── test_time_techniques ├── README.md └── query_rewriting.py └── training ├── README.md ├── config_128gpusfsdp_llama.yml ├── modeling_llama.py └── train.sh /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/README.md -------------------------------------------------------------------------------- /evaluation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/README.md -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/aops.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/aops.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/biology.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/biology.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/earth_science.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/earth_science.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/economics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/economics.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/leetcode.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/leetcode.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/pony.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/pony.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/psychology.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/psychology.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/robotics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/robotics.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/stackoverflow.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/stackoverflow.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/sustainable_living.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/sustainable_living.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/theoremqa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/theoremqa.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/theoremqa_questions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/theoremqa_questions.json -------------------------------------------------------------------------------- /evaluation/bright/configs/bm25/theoremqa_theorems.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/bm25/theoremqa_theorems.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/aops.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/aops.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/biology.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/biology.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/earth_science.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/earth_science.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/economics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/economics.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/leetcode.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/leetcode.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/pony.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/pony.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/psychology.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/psychology.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/robotics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/robotics.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/stackoverflow.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/stackoverflow.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/sustainable_living.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/sustainable_living.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/theoremqa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/theoremqa.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/theoremqa_questions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/theoremqa_questions.json -------------------------------------------------------------------------------- /evaluation/bright/configs/reasonir/theoremqa_theorems.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/configs/reasonir/theoremqa_theorems.json -------------------------------------------------------------------------------- /evaluation/bright/other_requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/other_requirements.txt -------------------------------------------------------------------------------- /evaluation/bright/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/prompts.py -------------------------------------------------------------------------------- /evaluation/bright/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/requirements.txt -------------------------------------------------------------------------------- /evaluation/bright/reranker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/reranker.py -------------------------------------------------------------------------------- /evaluation/bright/reranker_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/reranker_script.sh -------------------------------------------------------------------------------- /evaluation/bright/retrievers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/retrievers.py -------------------------------------------------------------------------------- /evaluation/bright/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/run.py -------------------------------------------------------------------------------- /evaluation/bright/script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/bright/script.sh -------------------------------------------------------------------------------- /evaluation/rag/datastore/build_datastore.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/datastore/build_datastore.sh -------------------------------------------------------------------------------- /evaluation/rag/datastore/construct_datastore_corpus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/datastore/construct_datastore_corpus.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/apis/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/apis/base.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/apis/offline_massiveds_search_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/apis/offline_massiveds_search_api.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/scripts/evaluate_naive_rag.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/scripts/evaluate_naive_rag.sh -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/conf/naive_rag_default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/conf/naive_rag_default.yaml -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/data/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/data/datasets.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/eval/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/eval/evaluate.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/main.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/prompts/task_instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/prompts/task_instructions.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/utils/cache_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/utils/cache_utils.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/utils/hydra_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/utils/hydra_runner.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/utils/math_equivalence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/utils/math_equivalence.py -------------------------------------------------------------------------------- /evaluation/rag/gpqa/src/workflow/naive_rag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/gpqa/src/workflow/naive_rag.py -------------------------------------------------------------------------------- /evaluation/rag/mmlu_cot/compute_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/mmlu_cot/compute_accuracy.py -------------------------------------------------------------------------------- /evaluation/rag/mmlu_cot/cot_prompt_lib/initial_prompt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/mmlu_cot/cot_prompt_lib/initial_prompt.txt -------------------------------------------------------------------------------- /evaluation/rag/mmlu_cot/evaluate_from_local_mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/mmlu_cot/evaluate_from_local_mmlu.py -------------------------------------------------------------------------------- /evaluation/rag/mmlu_cot/extract_mmlu_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/mmlu_cot/extract_mmlu_group.py -------------------------------------------------------------------------------- /evaluation/rag/mmlu_cot/scripts/eval_llama_3_8b_mmlu_rag.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/mmlu_cot/scripts/eval_llama_3_8b_mmlu_rag.sh -------------------------------------------------------------------------------- /evaluation/rag/mmlu_cot/utils/extract_cot_as_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/evaluation/rag/mmlu_cot/utils/extract_cot_as_queries.py -------------------------------------------------------------------------------- /synthetic_data_generation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/README.md -------------------------------------------------------------------------------- /synthetic_data_generation/batch_api_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/batch_api_helper.py -------------------------------------------------------------------------------- /synthetic_data_generation/data_gen_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/data_gen_prompts.py -------------------------------------------------------------------------------- /synthetic_data_generation/doc_to_query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/doc_to_query.py -------------------------------------------------------------------------------- /synthetic_data_generation/doc_to_query_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/doc_to_query_batch.py -------------------------------------------------------------------------------- /synthetic_data_generation/document_filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/document_filters/__init__.py -------------------------------------------------------------------------------- /synthetic_data_generation/document_filters/basic_filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/document_filters/basic_filters.py -------------------------------------------------------------------------------- /synthetic_data_generation/document_filters/fineweb_edu_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/document_filters/fineweb_edu_filter.py -------------------------------------------------------------------------------- /synthetic_data_generation/gen_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/gen_utils.py -------------------------------------------------------------------------------- /synthetic_data_generation/generate_reasoning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/generate_reasoning.py -------------------------------------------------------------------------------- /synthetic_data_generation/generate_reasoning_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/generate_reasoning_batch.py -------------------------------------------------------------------------------- /synthetic_data_generation/hard_negative_mining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/hard_negative_mining.py -------------------------------------------------------------------------------- /synthetic_data_generation/lm_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/lm_helper.py -------------------------------------------------------------------------------- /synthetic_data_generation/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/requirements.txt -------------------------------------------------------------------------------- /synthetic_data_generation/script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/script.sh -------------------------------------------------------------------------------- /synthetic_data_generation/script_batch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/script_batch.sh -------------------------------------------------------------------------------- /synthetic_data_generation/setup_java.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/setup_java.sh -------------------------------------------------------------------------------- /synthetic_data_generation/supplement_negative_passage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/synthetic_data_generation/supplement_negative_passage.py -------------------------------------------------------------------------------- /test_time_techniques/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/test_time_techniques/README.md -------------------------------------------------------------------------------- /test_time_techniques/query_rewriting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/test_time_techniques/query_rewriting.py -------------------------------------------------------------------------------- /training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/training/README.md -------------------------------------------------------------------------------- /training/config_128gpusfsdp_llama.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/training/config_128gpusfsdp_llama.yml -------------------------------------------------------------------------------- /training/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/training/modeling_llama.py -------------------------------------------------------------------------------- /training/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/ReasonIR/HEAD/training/train.sh --------------------------------------------------------------------------------