├── .gitignore ├── .gitmodules ├── .python-version ├── LICENSE ├── README.md ├── assets ├── docs │ ├── DAPO.md │ ├── agent_config.md │ ├── asyncRL.md │ ├── contribution.md │ ├── evaluation.md │ ├── install.md │ ├── sync_design.md │ ├── tool_server.md │ ├── training_guide.md │ ├── training_results.md │ ├── udpate_verl_version.md │ └── updates │ │ └── verltool_v0.6.0_upgrade.md └── imgs │ ├── logo.png │ ├── verl_tool_architecture.png │ └── wechat_group.jpg ├── benchmarks └── README.md ├── eval_service ├── README.md ├── app.py ├── config.py ├── model_service.py ├── scripts │ └── start_api_service.sh └── test │ ├── README.md │ ├── test_api.py │ └── test_api_mp.py ├── examples ├── data_preprocess │ ├── README.md │ ├── acecoder.py │ ├── acecoder_custom.py │ ├── deepcoder.py │ ├── deepmath.py │ ├── deepsearch.py │ ├── full_hh_rlhf.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── hellaswag.py │ ├── math_dataset.py │ ├── math_torl.py │ ├── mathcoder.py │ ├── mcp_universe.py │ ├── mmau_pro │ │ ├── compute_sequence_length.py │ │ ├── filter_out_long_prompt.py │ │ └── mmau_pro.py │ ├── nl2sql.sh │ ├── pixel_reasoner │ │ ├── infovqa.py │ │ ├── mvbench.py │ │ ├── prepare_train.py │ │ ├── tallyqa.py │ │ └── vstar.py │ ├── search_r1.py │ ├── skysql │ │ ├── download_skysql.sh │ │ ├── prepare_test.py │ │ ├── prepare_train.py │ │ └── sql.py │ ├── taco.py │ └── wikiQA.py └── train │ ├── README.md │ ├── acecoder │ ├── README.md │ ├── train_no_tool.sh │ ├── train_no_tool_dapo.sh │ └── train_with_tool.sh │ ├── deepsearch │ ├── README.md │ ├── eval.sh │ ├── train_4b.sh │ └── train_8b.sh │ ├── math_tir │ ├── README.md │ ├── train_1.5b_dapo.sh │ ├── train_1.5b_drgrpo.sh │ ├── train_1.5b_grpo.sh │ ├── train_1.5b_grpo_no_tool.sh │ ├── train_7b_grpo.sh │ ├── train_7b_grpo_megatron.sh │ └── train_7b_grpo_multi_node_slurm.sh │ ├── mathcoder │ ├── README.md │ ├── train.sh │ ├── train_1.5b.sh │ ├── train_mimo_7b.sh │ ├── train_mimo_7b_no_tool.sh │ └── train_mnode.sh │ ├── mcp_universe │ ├── README.md │ ├── create_data.sh │ ├── env.example.sh │ ├── eval_browser_automation.sh │ ├── eval_design_3d.sh │ ├── eval_financial_analysis.sh │ ├── eval_location_navigation.sh │ ├── eval_repository_management.sh │ ├── eval_web_search.sh │ ├── judge_test │ │ ├── README.md │ │ ├── autobrowser_web_search │ │ │ ├── build_answers.py │ │ │ ├── demo.sh │ │ │ ├── run.py │ │ │ └── run_batch.py │ │ └── financial_analysis │ │ │ └── run_finance_all.py │ ├── mcp_interface_test │ │ ├── discover_and_generate.sh │ │ ├── discover_tools.py │ │ ├── generate_cases_from_discovery.py │ │ ├── interface_runner.py │ │ ├── run_all.sh │ │ ├── test_eval_end2end.py │ │ ├── test_evaluator_llm_as_judge.py │ │ └── test_mcp_interface_call.py │ └── parquet_to_readable.py │ ├── mmau_pro │ └── train_omni_4ranks.sh │ ├── pixel_reasoner │ ├── README.md │ ├── eval.sh │ ├── train_3b.sh │ ├── train_dapo.sh │ ├── train_qwen25vl.sh │ └── train_qwen3vl.sh │ ├── search_r1 │ ├── README.md │ ├── reimplementation_tensorboard_records.0 │ ├── train_3b.sh │ ├── train_3b_dapo.sh │ ├── train_7b.sh │ └── train_7b_dapo.sh │ ├── skysql │ ├── README.md │ └── train_7b.sh │ ├── swe │ └── README.md │ └── wikiRL │ ├── train_wikiRL.sh │ └── wikiRL_server.sh ├── main.py ├── patches └── qwen_2_5_omni.patch ├── pyproject.toml ├── requirements.txt ├── scripts ├── train_commands.sh └── visualize_entropy.py └── verl_tool ├── README.md ├── __init__.py ├── agent_loop ├── __init__.py ├── agent_loop.py ├── verltool_agent_loop.py ├── vision_process.py └── vision_utils.py ├── servers ├── README.md ├── __init__.py ├── ray_utils.py ├── serve.py ├── tests │ ├── test_base.py │ ├── test_bash_terminal_tool.py │ ├── test_bing_search_tool.py │ ├── test_crop_tool.py │ ├── test_google_search_tool.py │ ├── test_ipython_efficiency.py │ ├── test_ipython_kernel.py │ ├── test_mcp_interface_dispatch.py │ ├── test_mcp_interface_tool.py │ ├── test_piston_server.py │ ├── test_piston_tool.py │ ├── test_python_code_tool.py │ ├── test_python_oj_tool.py │ ├── test_sandbox_fusion_tool.py │ ├── test_search_retrieval_tool.py │ ├── test_serp_search_tool.py │ ├── test_sql_tool.py │ ├── test_text_browser.py │ └── test_text_browser_multi.py ├── tool_server.py ├── tools │ ├── __init__.py │ ├── audio_crop.py │ ├── base.py │ ├── bash_terminal.py │ ├── bing_search.py │ ├── finish.py │ ├── google_search.py │ ├── ipython_code.py │ ├── mcp_interface.py │ ├── piston.py │ ├── pixel_reasoner.py │ ├── python_code.py │ ├── python_oj.py │ ├── sandbox_fusion.py │ ├── search_retrieval.py │ ├── sql.py │ ├── text_browser.py │ └── utils │ │ ├── bash_session.py │ │ ├── deepsearch_utils.py │ │ ├── ipython_tool.py │ │ ├── mcp_client.py │ │ ├── retrieval_server.py │ │ ├── sql_executor.py │ │ └── web_agent_utils.py └── utils.py ├── trainer ├── __init__.py ├── config │ ├── __init__.py │ ├── _generated_ppo_megatron_trainer.yaml │ ├── _generated_ppo_trainer.yaml │ ├── actor │ │ ├── actor.yaml │ │ ├── dp_actor.yaml │ │ └── megatron_actor.yaml │ ├── algorithm.py │ ├── config.py │ ├── critic │ │ ├── critic.yaml │ │ ├── dp_critic.yaml │ │ └── megatron_critic.yaml │ ├── data │ │ └── legacy_data.yaml │ ├── engine │ │ ├── fsdp.yaml │ │ └── megatron.yaml │ ├── evaluation.yaml │ ├── generation.yaml │ ├── model │ │ └── hf_model.yaml │ ├── npu_profile │ │ └── npu_profile.yaml │ ├── optim │ │ ├── fsdp.yaml │ │ └── megatron.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ ├── ref │ │ ├── dp_ref.yaml │ │ ├── megatron_ref.yaml │ │ └── ref.yaml │ ├── reward_model │ │ ├── dp_reward_model.yaml │ │ ├── megatron_reward_model.yaml │ │ └── reward_model.yaml │ ├── rollout │ │ └── rollout.yaml │ ├── sft_trainer.yaml │ ├── sft_trainer_engine.yaml │ └── verltool │ │ └── agent.yaml ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── metric_util.py │ ├── ray_trainer.py │ └── reward.py └── runtime_env.yaml ├── utils └── dataset │ └── audio_utils.py └── workers ├── __init__.py ├── reward_manager ├── __init__.py ├── acecoder.py ├── audio.py ├── deepsearch.py ├── mcp_universe_eval.py ├── pixel_reasoner.py ├── reward_score │ ├── __init__.py │ ├── torl_eval.py │ └── torl_math.py ├── search_r1_qa_em.py ├── simple_tir.py ├── sqlcoder.py ├── torl.py ├── utils.py └── wikiRL.py ├── rollout ├── __init__.py ├── replica.py └── vllm_rollout │ ├── __init__.py │ └── vllm_async_server.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/.gitmodules -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/README.md -------------------------------------------------------------------------------- /assets/docs/DAPO.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/DAPO.md -------------------------------------------------------------------------------- /assets/docs/agent_config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/agent_config.md -------------------------------------------------------------------------------- /assets/docs/asyncRL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/asyncRL.md -------------------------------------------------------------------------------- /assets/docs/contribution.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/contribution.md -------------------------------------------------------------------------------- /assets/docs/evaluation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/evaluation.md -------------------------------------------------------------------------------- /assets/docs/install.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/install.md -------------------------------------------------------------------------------- /assets/docs/sync_design.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/sync_design.md -------------------------------------------------------------------------------- /assets/docs/tool_server.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/tool_server.md -------------------------------------------------------------------------------- /assets/docs/training_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/training_guide.md -------------------------------------------------------------------------------- /assets/docs/training_results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/training_results.md -------------------------------------------------------------------------------- /assets/docs/udpate_verl_version.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/udpate_verl_version.md -------------------------------------------------------------------------------- /assets/docs/updates/verltool_v0.6.0_upgrade.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/docs/updates/verltool_v0.6.0_upgrade.md -------------------------------------------------------------------------------- /assets/imgs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/imgs/logo.png -------------------------------------------------------------------------------- /assets/imgs/verl_tool_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/imgs/verl_tool_architecture.png -------------------------------------------------------------------------------- /assets/imgs/wechat_group.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/assets/imgs/wechat_group.jpg -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /eval_service/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/README.md -------------------------------------------------------------------------------- /eval_service/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/app.py -------------------------------------------------------------------------------- /eval_service/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/config.py -------------------------------------------------------------------------------- /eval_service/model_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/model_service.py -------------------------------------------------------------------------------- /eval_service/scripts/start_api_service.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/scripts/start_api_service.sh -------------------------------------------------------------------------------- /eval_service/test/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/test/README.md -------------------------------------------------------------------------------- /eval_service/test/test_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/test/test_api.py -------------------------------------------------------------------------------- /eval_service/test/test_api_mp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/eval_service/test/test_api_mp.py -------------------------------------------------------------------------------- /examples/data_preprocess/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/README.md -------------------------------------------------------------------------------- /examples/data_preprocess/acecoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/acecoder.py -------------------------------------------------------------------------------- /examples/data_preprocess/acecoder_custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/acecoder_custom.py -------------------------------------------------------------------------------- /examples/data_preprocess/deepcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/deepcoder.py -------------------------------------------------------------------------------- /examples/data_preprocess/deepmath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/deepmath.py -------------------------------------------------------------------------------- /examples/data_preprocess/deepsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/deepsearch.py -------------------------------------------------------------------------------- /examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /examples/data_preprocess/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/geo3k.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /examples/data_preprocess/math_torl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/math_torl.py -------------------------------------------------------------------------------- /examples/data_preprocess/mathcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/mathcoder.py -------------------------------------------------------------------------------- /examples/data_preprocess/mcp_universe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/mcp_universe.py -------------------------------------------------------------------------------- /examples/data_preprocess/mmau_pro/compute_sequence_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/mmau_pro/compute_sequence_length.py -------------------------------------------------------------------------------- /examples/data_preprocess/mmau_pro/filter_out_long_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/mmau_pro/filter_out_long_prompt.py -------------------------------------------------------------------------------- /examples/data_preprocess/mmau_pro/mmau_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/mmau_pro/mmau_pro.py -------------------------------------------------------------------------------- /examples/data_preprocess/nl2sql.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/nl2sql.sh -------------------------------------------------------------------------------- /examples/data_preprocess/pixel_reasoner/infovqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/pixel_reasoner/infovqa.py -------------------------------------------------------------------------------- /examples/data_preprocess/pixel_reasoner/mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/pixel_reasoner/mvbench.py -------------------------------------------------------------------------------- /examples/data_preprocess/pixel_reasoner/prepare_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/pixel_reasoner/prepare_train.py -------------------------------------------------------------------------------- /examples/data_preprocess/pixel_reasoner/tallyqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/pixel_reasoner/tallyqa.py -------------------------------------------------------------------------------- /examples/data_preprocess/pixel_reasoner/vstar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/pixel_reasoner/vstar.py -------------------------------------------------------------------------------- /examples/data_preprocess/search_r1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/search_r1.py -------------------------------------------------------------------------------- /examples/data_preprocess/skysql/download_skysql.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/skysql/download_skysql.sh -------------------------------------------------------------------------------- /examples/data_preprocess/skysql/prepare_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/skysql/prepare_test.py -------------------------------------------------------------------------------- /examples/data_preprocess/skysql/prepare_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/skysql/prepare_train.py -------------------------------------------------------------------------------- /examples/data_preprocess/skysql/sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/skysql/sql.py -------------------------------------------------------------------------------- /examples/data_preprocess/taco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/taco.py -------------------------------------------------------------------------------- /examples/data_preprocess/wikiQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/data_preprocess/wikiQA.py -------------------------------------------------------------------------------- /examples/train/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/README.md -------------------------------------------------------------------------------- /examples/train/acecoder/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/acecoder/README.md -------------------------------------------------------------------------------- /examples/train/acecoder/train_no_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/acecoder/train_no_tool.sh -------------------------------------------------------------------------------- /examples/train/acecoder/train_no_tool_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/acecoder/train_no_tool_dapo.sh -------------------------------------------------------------------------------- /examples/train/acecoder/train_with_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/acecoder/train_with_tool.sh -------------------------------------------------------------------------------- /examples/train/deepsearch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/deepsearch/README.md -------------------------------------------------------------------------------- /examples/train/deepsearch/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/deepsearch/eval.sh -------------------------------------------------------------------------------- /examples/train/deepsearch/train_4b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/deepsearch/train_4b.sh -------------------------------------------------------------------------------- /examples/train/deepsearch/train_8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/deepsearch/train_8b.sh -------------------------------------------------------------------------------- /examples/train/math_tir/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/README.md -------------------------------------------------------------------------------- /examples/train/math_tir/train_1.5b_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/train_1.5b_dapo.sh -------------------------------------------------------------------------------- /examples/train/math_tir/train_1.5b_drgrpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/train_1.5b_drgrpo.sh -------------------------------------------------------------------------------- /examples/train/math_tir/train_1.5b_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/train_1.5b_grpo.sh -------------------------------------------------------------------------------- /examples/train/math_tir/train_1.5b_grpo_no_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/train_1.5b_grpo_no_tool.sh -------------------------------------------------------------------------------- /examples/train/math_tir/train_7b_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/train_7b_grpo.sh -------------------------------------------------------------------------------- /examples/train/math_tir/train_7b_grpo_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/train_7b_grpo_megatron.sh -------------------------------------------------------------------------------- /examples/train/math_tir/train_7b_grpo_multi_node_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/math_tir/train_7b_grpo_multi_node_slurm.sh -------------------------------------------------------------------------------- /examples/train/mathcoder/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mathcoder/README.md -------------------------------------------------------------------------------- /examples/train/mathcoder/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mathcoder/train.sh -------------------------------------------------------------------------------- /examples/train/mathcoder/train_1.5b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mathcoder/train_1.5b.sh -------------------------------------------------------------------------------- /examples/train/mathcoder/train_mimo_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mathcoder/train_mimo_7b.sh -------------------------------------------------------------------------------- /examples/train/mathcoder/train_mimo_7b_no_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mathcoder/train_mimo_7b_no_tool.sh -------------------------------------------------------------------------------- /examples/train/mathcoder/train_mnode.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mathcoder/train_mnode.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/README.md -------------------------------------------------------------------------------- /examples/train/mcp_universe/create_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/create_data.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/env.example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/env.example.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/eval_browser_automation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/eval_browser_automation.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/eval_design_3d.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/eval_design_3d.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/eval_financial_analysis.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/eval_financial_analysis.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/eval_location_navigation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/eval_location_navigation.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/eval_repository_management.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/eval_repository_management.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/eval_web_search.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/eval_web_search.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/judge_test/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/judge_test/README.md -------------------------------------------------------------------------------- /examples/train/mcp_universe/judge_test/autobrowser_web_search/build_answers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/judge_test/autobrowser_web_search/build_answers.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/judge_test/autobrowser_web_search/demo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/judge_test/autobrowser_web_search/demo.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/judge_test/autobrowser_web_search/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/judge_test/autobrowser_web_search/run.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/judge_test/autobrowser_web_search/run_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/judge_test/autobrowser_web_search/run_batch.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/judge_test/financial_analysis/run_finance_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/judge_test/financial_analysis/run_finance_all.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/discover_and_generate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/discover_and_generate.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/discover_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/discover_tools.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/generate_cases_from_discovery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/generate_cases_from_discovery.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/interface_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/interface_runner.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/run_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/run_all.sh -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/test_eval_end2end.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/test_eval_end2end.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/test_evaluator_llm_as_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/test_evaluator_llm_as_judge.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/mcp_interface_test/test_mcp_interface_call.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/mcp_interface_test/test_mcp_interface_call.py -------------------------------------------------------------------------------- /examples/train/mcp_universe/parquet_to_readable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mcp_universe/parquet_to_readable.py -------------------------------------------------------------------------------- /examples/train/mmau_pro/train_omni_4ranks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/mmau_pro/train_omni_4ranks.sh -------------------------------------------------------------------------------- /examples/train/pixel_reasoner/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/pixel_reasoner/README.md -------------------------------------------------------------------------------- /examples/train/pixel_reasoner/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/pixel_reasoner/eval.sh -------------------------------------------------------------------------------- /examples/train/pixel_reasoner/train_3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/pixel_reasoner/train_3b.sh -------------------------------------------------------------------------------- /examples/train/pixel_reasoner/train_dapo.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/train/pixel_reasoner/train_qwen25vl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/pixel_reasoner/train_qwen25vl.sh -------------------------------------------------------------------------------- /examples/train/pixel_reasoner/train_qwen3vl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/pixel_reasoner/train_qwen3vl.sh -------------------------------------------------------------------------------- /examples/train/search_r1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/search_r1/README.md -------------------------------------------------------------------------------- /examples/train/search_r1/reimplementation_tensorboard_records.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/search_r1/reimplementation_tensorboard_records.0 -------------------------------------------------------------------------------- /examples/train/search_r1/train_3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/search_r1/train_3b.sh -------------------------------------------------------------------------------- /examples/train/search_r1/train_3b_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/search_r1/train_3b_dapo.sh -------------------------------------------------------------------------------- /examples/train/search_r1/train_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/search_r1/train_7b.sh -------------------------------------------------------------------------------- /examples/train/search_r1/train_7b_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/search_r1/train_7b_dapo.sh -------------------------------------------------------------------------------- /examples/train/skysql/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/skysql/README.md -------------------------------------------------------------------------------- /examples/train/skysql/train_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/skysql/train_7b.sh -------------------------------------------------------------------------------- /examples/train/swe/README.md: -------------------------------------------------------------------------------- 1 | ## SWE 2 | coming soon ~ -------------------------------------------------------------------------------- /examples/train/wikiRL/train_wikiRL.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/wikiRL/train_wikiRL.sh -------------------------------------------------------------------------------- /examples/train/wikiRL/wikiRL_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/examples/train/wikiRL/wikiRL_server.sh -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/main.py -------------------------------------------------------------------------------- /patches/qwen_2_5_omni.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/patches/qwen_2_5_omni.patch -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/train_commands.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/scripts/train_commands.sh -------------------------------------------------------------------------------- /scripts/visualize_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/scripts/visualize_entropy.py -------------------------------------------------------------------------------- /verl_tool/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/README.md -------------------------------------------------------------------------------- /verl_tool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/agent_loop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/agent_loop/__init__.py -------------------------------------------------------------------------------- /verl_tool/agent_loop/agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/agent_loop/agent_loop.py -------------------------------------------------------------------------------- /verl_tool/agent_loop/verltool_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/agent_loop/verltool_agent_loop.py -------------------------------------------------------------------------------- /verl_tool/agent_loop/vision_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/agent_loop/vision_process.py -------------------------------------------------------------------------------- /verl_tool/agent_loop/vision_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/agent_loop/vision_utils.py -------------------------------------------------------------------------------- /verl_tool/servers/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/README.md -------------------------------------------------------------------------------- /verl_tool/servers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/servers/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/ray_utils.py -------------------------------------------------------------------------------- /verl_tool/servers/serve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/serve.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_base.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_bash_terminal_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_bash_terminal_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_bing_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_bing_search_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_crop_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_crop_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_google_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_google_search_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_ipython_efficiency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_ipython_efficiency.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_ipython_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_ipython_kernel.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_mcp_interface_dispatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_mcp_interface_dispatch.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_mcp_interface_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_mcp_interface_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_piston_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_piston_server.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_piston_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_piston_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_python_code_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_python_code_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_python_oj_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_python_oj_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_sandbox_fusion_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_sandbox_fusion_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_search_retrieval_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_search_retrieval_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_serp_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_serp_search_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_sql_tool.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_text_browser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_text_browser.py -------------------------------------------------------------------------------- /verl_tool/servers/tests/test_text_browser_multi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tests/test_text_browser_multi.py -------------------------------------------------------------------------------- /verl_tool/servers/tool_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tool_server.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import ALL_TOOLS, get_tool_cls, set_use_tqdm -------------------------------------------------------------------------------- /verl_tool/servers/tools/audio_crop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/audio_crop.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/base.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/bash_terminal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/bash_terminal.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/bing_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/bing_search.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/finish.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/finish.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/google_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/google_search.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/ipython_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/ipython_code.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/mcp_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/mcp_interface.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/piston.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/piston.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/pixel_reasoner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/pixel_reasoner.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/python_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/python_code.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/python_oj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/python_oj.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/sandbox_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/sandbox_fusion.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/search_retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/search_retrieval.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/sql.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/text_browser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/text_browser.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/utils/bash_session.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/utils/bash_session.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/utils/deepsearch_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/utils/deepsearch_utils.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/utils/ipython_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/utils/ipython_tool.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/utils/mcp_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/utils/mcp_client.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/utils/retrieval_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/utils/retrieval_server.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/utils/sql_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/utils/sql_executor.py -------------------------------------------------------------------------------- /verl_tool/servers/tools/utils/web_agent_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/tools/utils/web_agent_utils.py -------------------------------------------------------------------------------- /verl_tool/servers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/servers/utils.py -------------------------------------------------------------------------------- /verl_tool/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/trainer/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/__init__.py -------------------------------------------------------------------------------- /verl_tool/trainer/config/_generated_ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/_generated_ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/_generated_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/_generated_ppo_trainer.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/actor/actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/actor/actor.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/actor/dp_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/actor/dp_actor.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/actor/megatron_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/actor/megatron_actor.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/algorithm.py -------------------------------------------------------------------------------- /verl_tool/trainer/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/config.py -------------------------------------------------------------------------------- /verl_tool/trainer/config/critic/critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/critic/critic.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/critic/dp_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/critic/dp_critic.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/critic/megatron_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/critic/megatron_critic.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/data/legacy_data.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/data/legacy_data.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/engine/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/engine/fsdp.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/engine/megatron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/engine/megatron.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/generation.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/model/hf_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/model/hf_model.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/npu_profile/npu_profile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/npu_profile/npu_profile.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/optim/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/optim/fsdp.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/optim/megatron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/optim/megatron.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/ref/dp_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/ref/dp_ref.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/ref/megatron_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/ref/megatron_ref.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/ref/ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/ref/ref.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/reward_model/dp_reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/reward_model/dp_reward_model.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/reward_model/megatron_reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/reward_model/megatron_reward_model.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/reward_model/reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/reward_model/reward_model.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/rollout/rollout.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/rollout/rollout.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/sft_trainer_engine.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/sft_trainer_engine.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/config/verltool/agent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/config/verltool/agent.yaml -------------------------------------------------------------------------------- /verl_tool/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/main_ppo.py -------------------------------------------------------------------------------- /verl_tool/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/trainer/ppo/metric_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/ppo/metric_util.py -------------------------------------------------------------------------------- /verl_tool/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /verl_tool/trainer/ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/ppo/reward.py -------------------------------------------------------------------------------- /verl_tool/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /verl_tool/utils/dataset/audio_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/utils/dataset/audio_utils.py -------------------------------------------------------------------------------- /verl_tool/workers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/acecoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/acecoder.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/audio.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/deepsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/deepsearch.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/mcp_universe_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/mcp_universe_eval.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/pixel_reasoner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/pixel_reasoner.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/reward_score/__init__.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/reward_score/torl_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/reward_score/torl_eval.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/reward_score/torl_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/reward_score/torl_math.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/search_r1_qa_em.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/search_r1_qa_em.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/simple_tir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/simple_tir.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/sqlcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/sqlcoder.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/torl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/torl.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/utils.py -------------------------------------------------------------------------------- /verl_tool/workers/reward_manager/wikiRL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/reward_manager/wikiRL.py -------------------------------------------------------------------------------- /verl_tool/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/workers/rollout/replica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/rollout/replica.py -------------------------------------------------------------------------------- /verl_tool/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl_tool/workers/rollout/vllm_rollout/vllm_async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/rollout/vllm_rollout/vllm_async_server.py -------------------------------------------------------------------------------- /verl_tool/workers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/verl-tool/HEAD/verl_tool/workers/utils.py --------------------------------------------------------------------------------