├── LICENSE ├── OpenRLHF-RAG ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── README_zh.md ├── dockerfile │ ├── Dockerfile │ └── docker-entrypoint.sh ├── docs │ ├── logo.png │ ├── ppo_examples.md │ └── ray_architecture.png ├── dpsk1_5b-1node-grpo-jjh-log ├── dpsk1_5b-1node-grpo-jjh-log.txt ├── dpsk1_5b-1node-grpo-jjh_1.sh ├── dpsk1_5b-1node-grpo-jjh_2.sh ├── dpsk1_5b-1node-grpo.sh ├── examples │ └── scripts │ │ ├── docker_run.sh │ │ ├── nvidia_docker_install.sh │ │ ├── serve_remote_rm.sh │ │ ├── train_conditional_llama.sh │ │ ├── train_continue_pretrain_llama.sh │ │ ├── train_dpo_llama.sh │ │ ├── train_dpo_llama_34b.sh │ │ ├── train_dpo_ring_llama.sh │ │ ├── train_iterative_dpo_llama.sh │ │ ├── train_knowledge_distillation.sh │ │ ├── train_kto_llama.sh │ │ ├── train_llama_slurm.sh │ │ ├── train_ppo_llama.sh │ │ ├── train_ppo_llama_ray.sh │ │ ├── train_ppo_llama_ray_70b.sh │ │ ├── train_ppo_llama_ray_slurm.sh │ │ ├── train_ppo_llama_with_remote_rm.sh │ │ ├── train_prm_mistral.sh │ │ ├── train_reinforce_llama_ray.sh │ │ ├── train_rejection_sampling_llama.sh │ │ ├── train_rm_llama.sh │ │ ├── train_sft_llama.sh │ │ └── train_sft_mixtral_lora.sh ├── log │ └── server │ │ └── final-dpsk1_5b-rm1-1-2-grpo-len_29000tbs_512-rbs_128-sample_8-kl_0.001-warmup_0.0-ep_10000-plr_2e-6-temp1.0-30k-node0.log ├── openrlhf.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── requires.txt │ └── top_level.txt ├── openrlhf │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── cli │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── server_dpsk_tuple.cpython-310.pyc │ │ │ ├── train_ppo.cpython-310.pyc │ │ │ └── train_ppo_ray.cpython-310.pyc │ │ ├── batch_inference.py │ │ ├── interactive_chat.py │ │ ├── serve_rm.py │ │ ├── server_dpsk_tuple.py │ │ ├── server_dpsk_tuple_check.py │ │ ├── server_rm_rag.py │ │ ├── train_dpo.py │ │ ├── train_kd.py │ │ ├── train_kto.py │ │ ├── train_ppo.py │ │ ├── train_ppo_ray.py │ │ ├── train_prm.py │ │ ├── train_rm.py │ │ └── train_sft.py │ ├── datasets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── process_reward_dataset.cpython-310.pyc │ │ │ ├── prompts_dataset.cpython-310.pyc │ │ │ ├── reward_dataset.cpython-310.pyc │ │ │ ├── sft_dataset.cpython-310.pyc │ │ │ ├── unpaired_preference_dataset.cpython-310.pyc │ │ │ └── utils.cpython-310.pyc │ │ ├── process_reward_dataset.py │ │ ├── prompts_dataset.py │ │ ├── reward_dataset.py │ │ ├── sft_dataset.py │ │ ├── unpaired_preference_dataset.py │ │ └── utils.py │ ├── models │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── actor.cpython-310.pyc │ │ │ ├── loss.cpython-310.pyc │ │ │ ├── model.cpython-310.pyc │ │ │ ├── ring_attn_utils.cpython-310.pyc │ │ │ └── utils.cpython-310.pyc │ │ ├── actor.py │ │ ├── loss.py │ │ ├── model.py │ │ ├── ring_attn_utils.py │ │ └── utils.py │ ├── trainer │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── dpo_trainer.cpython-310.pyc │ │ │ ├── kd_trainer.cpython-310.pyc │ │ │ ├── kto_trainer.cpython-310.pyc │ │ │ ├── ppo_trainer.cpython-310.pyc │ │ │ ├── prm_trainer.cpython-310.pyc │ │ │ ├── rm_trainer.cpython-310.pyc │ │ │ └── sft_trainer.cpython-310.pyc │ │ ├── dpo_trainer.py │ │ ├── kd_trainer.py │ │ ├── kto_trainer.py │ │ ├── ppo_trainer.py │ │ ├── ppo_utils │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── experience_maker.cpython-310.pyc │ │ │ │ ├── kl_controller.cpython-310.pyc │ │ │ │ └── replay_buffer.cpython-310.pyc │ │ │ ├── experience_maker.bak.py │ │ │ ├── experience_maker.py │ │ │ ├── kl_controller.py │ │ │ └── replay_buffer.py │ │ ├── prm_trainer.py │ │ ├── ray │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── launcher.cpython-310.pyc │ │ │ │ ├── ppo_actor.cpython-310.pyc │ │ │ │ ├── ppo_critic.cpython-310.pyc │ │ │ │ ├── utils.cpython-310.pyc │ │ │ │ ├── vllm_engine.cpython-310.pyc │ │ │ │ └── vllm_worker_wrap.cpython-310.pyc │ │ │ ├── launcher.py │ │ │ ├── ppo_actor.py │ │ │ ├── ppo_critic.py │ │ │ ├── utils.py │ │ │ ├── vllm_engine.py │ │ │ └── vllm_worker_wrap.py │ │ ├── rm_trainer.py │ │ └── sft_trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── distributed_sampler.cpython-310.pyc │ │ ├── distributed_util.cpython-310.pyc │ │ ├── logging_utils.cpython-310.pyc │ │ ├── processor.cpython-310.pyc │ │ ├── remote_rm_utils.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ │ ├── deepspeed │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── deepspeed.cpython-310.pyc │ │ │ └── deepspeed_utils.cpython-310.pyc │ │ ├── deepspeed.py │ │ └── deepspeed_utils.py │ │ ├── distributed_sampler.py │ │ ├── distributed_util.py │ │ ├── logging_utils.py │ │ ├── processor.py │ │ ├── remote_rm_utils.py │ │ └── utils.py ├── ppo_wo_ray_single_remote.sh ├── pyproject.toml ├── ray_start.sh ├── ray_start_jjh.sh ├── remote_rm_start_jjh.sh ├── requirements.txt ├── run.sh ├── setup.py ├── test_rm.py ├── test_vllm_ray.py └── version.txt ├── README.md ├── assets ├── 1.txt ├── bamboogle-web-2.pdf ├── bamboogle_online.jpg ├── benchmarks-2.pdf ├── benchmarks.jpg └── benchmarks_visible.jpg ├── data ├── eval_results │ ├── 2wiki_llama.jsonl │ ├── 2wiki_qwen.jsonl │ ├── bamboogle_llama.jsonl │ ├── bamboogle_qwen.jsonl │ ├── hotpotqa_llama.jsonl │ ├── hotpotqa_qwen.jsonl │ ├── musique_llama.jsonl │ └── musique_qwen.jsonl ├── eval_set │ ├── 2wiki_500.jsonl │ ├── bamboogle.jsonl │ ├── hotpotqa_500.jsonl │ └── musique_500.jsonl └── training_set │ ├── stage_1.jsonl │ └── stage_2.jsonl ├── evaluation ├── eval_search_loacl.py ├── eval_search_online.py ├── gen_search.sh ├── metric_calc_gpt_as_judge.py └── metric_calc_rule.py ├── requirements.txt ├── scripts ├── llama_reinforce_plus_train_stage1.sh ├── llama_reinforce_plus_train_stage2.sh ├── qwen_grpo.sh ├── qwen_reinforce_plus_train.sh └── ray_start.sh ├── setup.py ├── train ├── jsonl2hf_dataset.py ├── reward_server_llama_stage1.py ├── reward_server_llama_stage2.py ├── reward_server_qwen_zero.py └── wiki_corpus_load.py ├── version.txt └── wiki_corpus_index_bulid ├── build_corpus_embedding.py ├── build_corpus_index.py ├── samples ├── filtered_kilt_sample_100.jsonl ├── train_kilt_full_page_sample_100.tsv └── train_np_title_abs_sample_100.tsv └── split_kilt_to_100.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/LICENSE -------------------------------------------------------------------------------- /OpenRLHF-RAG/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/CONTRIBUTING.md -------------------------------------------------------------------------------- /OpenRLHF-RAG/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/LICENSE -------------------------------------------------------------------------------- /OpenRLHF-RAG/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/README.md -------------------------------------------------------------------------------- /OpenRLHF-RAG/README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/README_zh.md -------------------------------------------------------------------------------- /OpenRLHF-RAG/dockerfile/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/dockerfile/Dockerfile -------------------------------------------------------------------------------- /OpenRLHF-RAG/dockerfile/docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/dockerfile/docker-entrypoint.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/docs/logo.png -------------------------------------------------------------------------------- /OpenRLHF-RAG/docs/ppo_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/docs/ppo_examples.md -------------------------------------------------------------------------------- /OpenRLHF-RAG/docs/ray_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/docs/ray_architecture.png -------------------------------------------------------------------------------- /OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh-log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh-log -------------------------------------------------------------------------------- /OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh-log.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh-log.txt -------------------------------------------------------------------------------- /OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh_1.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh_2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/dpsk1_5b-1node-grpo-jjh_2.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/dpsk1_5b-1node-grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/dpsk1_5b-1node-grpo.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/docker_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/docker_run.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/nvidia_docker_install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/nvidia_docker_install.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/serve_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/serve_remote_rm.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_conditional_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_conditional_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_continue_pretrain_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_continue_pretrain_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_dpo_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_dpo_llama_34b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_dpo_llama_34b.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_dpo_ring_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_dpo_ring_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_iterative_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_iterative_dpo_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_knowledge_distillation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_knowledge_distillation.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_kto_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_kto_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_llama_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_llama_slurm.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_ppo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_ppo_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_ppo_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_ppo_llama_ray.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_ppo_llama_ray_70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_ppo_llama_ray_70b.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_ppo_llama_ray_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_ppo_llama_ray_slurm.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_ppo_llama_with_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_ppo_llama_with_remote_rm.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_prm_mistral.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_prm_mistral.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_reinforce_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_reinforce_llama_ray.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_rejection_sampling_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_rejection_sampling_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_rm_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_rm_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_sft_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_sft_llama.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/examples/scripts/train_sft_mixtral_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/examples/scripts/train_sft_mixtral_lora.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/log/server/final-dpsk1_5b-rm1-1-2-grpo-len_29000tbs_512-rbs_128-sample_8-kl_0.001-warmup_0.0-ep_10000-plr_2e-6-temp1.0-30k-node0.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/log/server/final-dpsk1_5b-rm1-1-2-grpo-len_29000tbs_512-rbs_128-sample_8-kl_0.001-warmup_0.0-ep_10000-plr_2e-6-temp1.0-30k-node0.log -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf.egg-info/PKG-INFO -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf.egg-info/requires.txt -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | openrlhf 2 | -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/__pycache__/server_dpsk_tuple.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/__pycache__/server_dpsk_tuple.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/__pycache__/train_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/__pycache__/train_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/server_dpsk_tuple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/server_dpsk_tuple.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/server_dpsk_tuple_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/server_dpsk_tuple_check.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/server_rm_rag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/server_rm_rag.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_ppo.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/__pycache__/actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/__pycache__/actor.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/__pycache__/loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/__pycache__/loss.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/actor.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/loss.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/model.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/models/utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/prm_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/prm_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/experience_maker.bak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/experience_maker.bak.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__pycache__/processor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__pycache__/processor.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__pycache__/remote_rm_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__pycache__/remote_rm_utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/ppo_wo_ray_single_remote.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/ppo_wo_ray_single_remote.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/pyproject.toml -------------------------------------------------------------------------------- /OpenRLHF-RAG/ray_start.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/ray_start.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/ray_start_jjh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/ray_start_jjh.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/remote_rm_start_jjh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/remote_rm_start_jjh.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/requirements.txt -------------------------------------------------------------------------------- /OpenRLHF-RAG/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/run.sh -------------------------------------------------------------------------------- /OpenRLHF-RAG/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/setup.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/test_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/test_rm.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/test_vllm_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/OpenRLHF-RAG/test_vllm_ray.py -------------------------------------------------------------------------------- /OpenRLHF-RAG/version.txt: -------------------------------------------------------------------------------- 1 | 0.5.5.post2 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/README.md -------------------------------------------------------------------------------- /assets/1.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /assets/bamboogle-web-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/assets/bamboogle-web-2.pdf -------------------------------------------------------------------------------- /assets/bamboogle_online.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/assets/bamboogle_online.jpg -------------------------------------------------------------------------------- /assets/benchmarks-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/assets/benchmarks-2.pdf -------------------------------------------------------------------------------- /assets/benchmarks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/assets/benchmarks.jpg -------------------------------------------------------------------------------- /assets/benchmarks_visible.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/assets/benchmarks_visible.jpg -------------------------------------------------------------------------------- /data/eval_results/2wiki_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/2wiki_llama.jsonl -------------------------------------------------------------------------------- /data/eval_results/2wiki_qwen.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/2wiki_qwen.jsonl -------------------------------------------------------------------------------- /data/eval_results/bamboogle_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/bamboogle_llama.jsonl -------------------------------------------------------------------------------- /data/eval_results/bamboogle_qwen.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/bamboogle_qwen.jsonl -------------------------------------------------------------------------------- /data/eval_results/hotpotqa_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/hotpotqa_llama.jsonl -------------------------------------------------------------------------------- /data/eval_results/hotpotqa_qwen.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/hotpotqa_qwen.jsonl -------------------------------------------------------------------------------- /data/eval_results/musique_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/musique_llama.jsonl -------------------------------------------------------------------------------- /data/eval_results/musique_qwen.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_results/musique_qwen.jsonl -------------------------------------------------------------------------------- /data/eval_set/2wiki_500.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_set/2wiki_500.jsonl -------------------------------------------------------------------------------- /data/eval_set/bamboogle.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_set/bamboogle.jsonl -------------------------------------------------------------------------------- /data/eval_set/hotpotqa_500.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_set/hotpotqa_500.jsonl -------------------------------------------------------------------------------- /data/eval_set/musique_500.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/eval_set/musique_500.jsonl -------------------------------------------------------------------------------- /data/training_set/stage_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/training_set/stage_1.jsonl -------------------------------------------------------------------------------- /data/training_set/stage_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/data/training_set/stage_2.jsonl -------------------------------------------------------------------------------- /evaluation/eval_search_loacl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/evaluation/eval_search_loacl.py -------------------------------------------------------------------------------- /evaluation/eval_search_online.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/evaluation/eval_search_online.py -------------------------------------------------------------------------------- /evaluation/gen_search.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/evaluation/gen_search.sh -------------------------------------------------------------------------------- /evaluation/metric_calc_gpt_as_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/evaluation/metric_calc_gpt_as_judge.py -------------------------------------------------------------------------------- /evaluation/metric_calc_rule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/evaluation/metric_calc_rule.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/llama_reinforce_plus_train_stage1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/scripts/llama_reinforce_plus_train_stage1.sh -------------------------------------------------------------------------------- /scripts/llama_reinforce_plus_train_stage2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/scripts/llama_reinforce_plus_train_stage2.sh -------------------------------------------------------------------------------- /scripts/qwen_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/scripts/qwen_grpo.sh -------------------------------------------------------------------------------- /scripts/qwen_reinforce_plus_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/scripts/qwen_reinforce_plus_train.sh -------------------------------------------------------------------------------- /scripts/ray_start.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/scripts/ray_start.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/setup.py -------------------------------------------------------------------------------- /train/jsonl2hf_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/train/jsonl2hf_dataset.py -------------------------------------------------------------------------------- /train/reward_server_llama_stage1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/train/reward_server_llama_stage1.py -------------------------------------------------------------------------------- /train/reward_server_llama_stage2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/train/reward_server_llama_stage2.py -------------------------------------------------------------------------------- /train/reward_server_qwen_zero.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/train/reward_server_qwen_zero.py -------------------------------------------------------------------------------- /train/wiki_corpus_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/train/wiki_corpus_load.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.5.5.post2 -------------------------------------------------------------------------------- /wiki_corpus_index_bulid/build_corpus_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/wiki_corpus_index_bulid/build_corpus_embedding.py -------------------------------------------------------------------------------- /wiki_corpus_index_bulid/build_corpus_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/wiki_corpus_index_bulid/build_corpus_index.py -------------------------------------------------------------------------------- /wiki_corpus_index_bulid/samples/filtered_kilt_sample_100.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/wiki_corpus_index_bulid/samples/filtered_kilt_sample_100.jsonl -------------------------------------------------------------------------------- /wiki_corpus_index_bulid/samples/train_kilt_full_page_sample_100.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/wiki_corpus_index_bulid/samples/train_kilt_full_page_sample_100.tsv -------------------------------------------------------------------------------- /wiki_corpus_index_bulid/samples/train_np_title_abs_sample_100.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/wiki_corpus_index_bulid/samples/train_np_title_abs_sample_100.tsv -------------------------------------------------------------------------------- /wiki_corpus_index_bulid/split_kilt_to_100.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RUCAIBox/R1-Searcher/HEAD/wiki_corpus_index_bulid/split_kilt_to_100.py --------------------------------------------------------------------------------