├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── agent_r1 ├── llm_agent │ ├── __init__.py │ ├── generation.py │ └── tensor_helper.py ├── src │ ├── __init__.py │ ├── agent_dp_actor.py │ ├── agent_dp_critic.py │ ├── agent_ray_trainer.py │ ├── agent_reward_manager.py │ ├── agent_rl_dataset.py │ ├── config │ │ └── agent_trainer.yaml │ ├── core_algos.py │ ├── fsdp_workers.py │ ├── main_agent.py │ ├── metric_utils.py │ ├── reward.py │ ├── reward_score │ │ ├── __init__.py │ │ ├── gsm8k.py │ │ ├── math.py │ │ ├── qa_em_and_format.py │ │ └── retool.py │ ├── sglang_rollout.py │ └── vllm_rollout_spmd.py ├── tool │ ├── base.py │ ├── envs │ │ ├── __init__.py │ │ ├── mathtir.py │ │ ├── nous.py │ │ └── retool.py │ ├── tools │ │ ├── __init__.py │ │ ├── python_tool.py │ │ ├── search_tool.py │ │ └── wiki_search_tool.py │ └── utils.py └── vllm_infer │ ├── __init__.py │ ├── chat.py │ ├── config.py │ └── run.py ├── docs ├── algorithm │ └── algorithm.md ├── getting_started │ ├── installation.md │ └── quickstart.md ├── inference │ └── inference.md └── tutorial │ ├── multihopqa.md │ └── retool.md ├── examples ├── data_preprocess │ ├── 2wikimultihopqa.py │ ├── gsm8k.py │ ├── hotpotqa.py │ ├── musique.py │ └── retool.py └── trainer │ ├── run_grpo_hotpotqa.sh │ ├── run_grpo_multihopqa.sh │ ├── run_grpo_retool.sh │ ├── run_ppo_hotpotqa.sh │ ├── run_ppo_multihopqa.sh │ ├── run_ppo_retool.sh │ └── run_rpp_hotpotqa.sh ├── image ├── Equation.png ├── agent.png ├── framework.png ├── grpo.jpg ├── ppo.jpg └── rpp.jpg └── scripts ├── hotpotqa_search └── process_hotpotqa.py ├── kilt_search_server ├── process_kilt.py ├── run_search_api.sh └── search_api.py ├── model_merge.sh ├── vllm_serve.sh └── wiki_search_server ├── process_wiki.py ├── queries.txt ├── requirements.txt ├── run_benchmark.sh ├── run_search_api.sh ├── search_api.py └── test_search_api.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/README.md -------------------------------------------------------------------------------- /agent_r1/llm_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/llm_agent/__init__.py -------------------------------------------------------------------------------- /agent_r1/llm_agent/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/llm_agent/generation.py -------------------------------------------------------------------------------- /agent_r1/llm_agent/tensor_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/llm_agent/tensor_helper.py -------------------------------------------------------------------------------- /agent_r1/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_r1/src/agent_dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/agent_dp_actor.py -------------------------------------------------------------------------------- /agent_r1/src/agent_dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/agent_dp_critic.py -------------------------------------------------------------------------------- /agent_r1/src/agent_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/agent_ray_trainer.py -------------------------------------------------------------------------------- /agent_r1/src/agent_reward_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/agent_reward_manager.py -------------------------------------------------------------------------------- /agent_r1/src/agent_rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/agent_rl_dataset.py -------------------------------------------------------------------------------- /agent_r1/src/config/agent_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/config/agent_trainer.yaml -------------------------------------------------------------------------------- /agent_r1/src/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/core_algos.py -------------------------------------------------------------------------------- /agent_r1/src/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/fsdp_workers.py -------------------------------------------------------------------------------- /agent_r1/src/main_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/main_agent.py -------------------------------------------------------------------------------- /agent_r1/src/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/metric_utils.py -------------------------------------------------------------------------------- /agent_r1/src/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/reward.py -------------------------------------------------------------------------------- /agent_r1/src/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/reward_score/__init__.py -------------------------------------------------------------------------------- /agent_r1/src/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/reward_score/gsm8k.py -------------------------------------------------------------------------------- /agent_r1/src/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/reward_score/math.py -------------------------------------------------------------------------------- /agent_r1/src/reward_score/qa_em_and_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/reward_score/qa_em_and_format.py -------------------------------------------------------------------------------- /agent_r1/src/reward_score/retool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/reward_score/retool.py -------------------------------------------------------------------------------- /agent_r1/src/sglang_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/sglang_rollout.py -------------------------------------------------------------------------------- /agent_r1/src/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/src/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /agent_r1/tool/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/base.py -------------------------------------------------------------------------------- /agent_r1/tool/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/envs/__init__.py -------------------------------------------------------------------------------- /agent_r1/tool/envs/mathtir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/envs/mathtir.py -------------------------------------------------------------------------------- /agent_r1/tool/envs/nous.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/envs/nous.py -------------------------------------------------------------------------------- /agent_r1/tool/envs/retool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/envs/retool.py -------------------------------------------------------------------------------- /agent_r1/tool/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/tools/__init__.py -------------------------------------------------------------------------------- /agent_r1/tool/tools/python_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/tools/python_tool.py -------------------------------------------------------------------------------- /agent_r1/tool/tools/search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/tools/search_tool.py -------------------------------------------------------------------------------- /agent_r1/tool/tools/wiki_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/tools/wiki_search_tool.py -------------------------------------------------------------------------------- /agent_r1/tool/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/tool/utils.py -------------------------------------------------------------------------------- /agent_r1/vllm_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_r1/vllm_infer/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/vllm_infer/chat.py -------------------------------------------------------------------------------- /agent_r1/vllm_infer/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/vllm_infer/config.py -------------------------------------------------------------------------------- /agent_r1/vllm_infer/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/agent_r1/vllm_infer/run.py -------------------------------------------------------------------------------- /docs/algorithm/algorithm.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/docs/algorithm/algorithm.md -------------------------------------------------------------------------------- /docs/getting_started/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/docs/getting_started/installation.md -------------------------------------------------------------------------------- /docs/getting_started/quickstart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/docs/getting_started/quickstart.md -------------------------------------------------------------------------------- /docs/inference/inference.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/docs/inference/inference.md -------------------------------------------------------------------------------- /docs/tutorial/multihopqa.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/docs/tutorial/multihopqa.md -------------------------------------------------------------------------------- /docs/tutorial/retool.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/docs/tutorial/retool.md -------------------------------------------------------------------------------- /examples/data_preprocess/2wikimultihopqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/data_preprocess/2wikimultihopqa.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /examples/data_preprocess/hotpotqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/data_preprocess/hotpotqa.py -------------------------------------------------------------------------------- /examples/data_preprocess/musique.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/data_preprocess/musique.py -------------------------------------------------------------------------------- /examples/data_preprocess/retool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/data_preprocess/retool.py -------------------------------------------------------------------------------- /examples/trainer/run_grpo_hotpotqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/trainer/run_grpo_hotpotqa.sh -------------------------------------------------------------------------------- /examples/trainer/run_grpo_multihopqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/trainer/run_grpo_multihopqa.sh -------------------------------------------------------------------------------- /examples/trainer/run_grpo_retool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/trainer/run_grpo_retool.sh -------------------------------------------------------------------------------- /examples/trainer/run_ppo_hotpotqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/trainer/run_ppo_hotpotqa.sh -------------------------------------------------------------------------------- /examples/trainer/run_ppo_multihopqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/trainer/run_ppo_multihopqa.sh -------------------------------------------------------------------------------- /examples/trainer/run_ppo_retool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/trainer/run_ppo_retool.sh -------------------------------------------------------------------------------- /examples/trainer/run_rpp_hotpotqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/examples/trainer/run_rpp_hotpotqa.sh -------------------------------------------------------------------------------- /image/Equation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/image/Equation.png -------------------------------------------------------------------------------- /image/agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/image/agent.png -------------------------------------------------------------------------------- /image/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/image/framework.png -------------------------------------------------------------------------------- /image/grpo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/image/grpo.jpg -------------------------------------------------------------------------------- /image/ppo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/image/ppo.jpg -------------------------------------------------------------------------------- /image/rpp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/image/rpp.jpg -------------------------------------------------------------------------------- /scripts/hotpotqa_search/process_hotpotqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/hotpotqa_search/process_hotpotqa.py -------------------------------------------------------------------------------- /scripts/kilt_search_server/process_kilt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/kilt_search_server/process_kilt.py -------------------------------------------------------------------------------- /scripts/kilt_search_server/run_search_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/kilt_search_server/run_search_api.sh -------------------------------------------------------------------------------- /scripts/kilt_search_server/search_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/kilt_search_server/search_api.py -------------------------------------------------------------------------------- /scripts/model_merge.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/model_merge.sh -------------------------------------------------------------------------------- /scripts/vllm_serve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/vllm_serve.sh -------------------------------------------------------------------------------- /scripts/wiki_search_server/process_wiki.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/wiki_search_server/process_wiki.py -------------------------------------------------------------------------------- /scripts/wiki_search_server/queries.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/wiki_search_server/queries.txt -------------------------------------------------------------------------------- /scripts/wiki_search_server/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/wiki_search_server/requirements.txt -------------------------------------------------------------------------------- /scripts/wiki_search_server/run_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/wiki_search_server/run_benchmark.sh -------------------------------------------------------------------------------- /scripts/wiki_search_server/run_search_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/wiki_search_server/run_search_api.sh -------------------------------------------------------------------------------- /scripts/wiki_search_server/search_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/wiki_search_server/search_api.py -------------------------------------------------------------------------------- /scripts/wiki_search_server/test_search_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0russwest0/Agent-R1/HEAD/scripts/wiki_search_server/test_search_api.py --------------------------------------------------------------------------------