├── docs ├── .nojekyll └── static │ ├── pdfs │ └── sample.pdf │ └── images │ ├── cases.png │ ├── hf-logo.png │ ├── datapipe.png │ ├── framework.png │ ├── eval_prior.png │ ├── leaderboard.png │ └── main_result.png ├── search ├── __init__.py ├── llm_agent │ └── __init__.py ├── install.sh ├── retrieval │ ├── retrieval_request.py │ ├── build_index.sh │ ├── build_index_vllm_api.sh │ └── read_npz_simple.py └── retrieval_launch.sh ├── verl ├── version │ └── version ├── trainer │ ├── runtime_env.yaml │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── main_ppo.cpython-310.pyc │ │ └── constants_ppo.cpython-310.pyc │ ├── ppo │ │ ├── __pycache__ │ │ │ ├── reward.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── core_algos.cpython-310.pyc │ │ │ ├── metric_utils.cpython-310.pyc │ │ │ └── ray_trainer.cpython-310.pyc │ │ └── __init__.py │ ├── config │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── algorithm.cpython-310.pyc │ │ ├── evaluation.yaml │ │ ├── __init__.py │ │ ├── generation.yaml │ │ ├── sft_trainer.yaml │ │ └── algorithm.py │ ├── __init__.py │ ├── constants_ppo.py │ └── main_eval.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-313.pyc │ ├── protocol.cpython-310.pyc │ ├── protocol.cpython-313.pyc │ └── base_config.cpython-310.pyc ├── utils │ ├── __pycache__ │ │ ├── fs.cpython-310.pyc │ │ ├── config.cpython-310.pyc │ │ ├── device.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ ├── __init__.cpython-310.pyc │ │ ├── hdfs_io.cpython-310.pyc │ │ ├── tracking.cpython-310.pyc │ │ ├── ulysses.cpython-310.pyc │ │ ├── fsdp_utils.cpython-310.pyc │ │ ├── net_utils.cpython-310.pyc │ │ ├── ray_utils.cpython-310.pyc │ │ ├── tokenizer.cpython-310.pyc │ │ ├── vllm_utils.cpython-310.pyc │ │ ├── flops_counter.cpython-310.pyc │ │ ├── import_utils.cpython-310.pyc │ │ ├── logging_utils.cpython-310.pyc │ │ ├── py_functional.cpython-310.pyc │ │ ├── torch_dtypes.cpython-310.pyc │ │ ├── seqlen_balancing.cpython-310.pyc │ │ ├── torch_functional.cpython-310.pyc │ │ └── activation_offload.cpython-310.pyc │ ├── metric │ │ ├── __pycache__ │ │ │ ├── utils.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ │ ├── __init__.py │ │ └── utils.py │ ├── dataset │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── sampler.cpython-310.pyc │ │ │ ├── rl_dataset.cpython-310.pyc │ │ │ ├── rm_dataset.cpython-310.pyc │ │ │ └── sft_dataset.cpython-310.pyc │ │ ├── __init__.py │ │ ├── README.md │ │ └── sampler.py │ ├── debug │ │ ├── __pycache__ │ │ │ └── __init__.cpython-310.pyc │ │ ├── __init__.py │ │ └── performance.py │ ├── logger │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── aggregate_logger.cpython-310.pyc │ │ └── __init__.py │ ├── profiler │ │ ├── __pycache__ │ │ │ ├── config.cpython-310.pyc │ │ │ ├── profile.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── performance.cpython-310.pyc │ │ ├── empty_annotations.py │ │ ├── __init__.py │ │ └── config.py │ ├── reward_score │ │ ├── __pycache__ │ │ │ ├── gsm8k.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── search_r1_like_qa_em.cpython-310.pyc │ │ │ ├── search_r1_like_qa_em_s3.cpython-310.pyc │ │ │ └── search_r1_like_qa_em_s4.cpython-310.pyc │ │ ├── prime_code │ │ │ ├── README.md │ │ │ └── utils.py │ │ ├── math_batch.py │ │ ├── geo3k.py │ │ ├── math_verify.py │ │ └── gsm8k.py │ ├── checkpoint │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── checkpoint_manager.cpython-310.pyc │ │ │ └── fsdp_checkpoint_manager.cpython-310.pyc │ │ └── __init__.py │ ├── megatron │ │ ├── __init__.py │ │ ├── memory.py │ │ ├── sequence_parallel.py │ │ ├── dist_checkpointing.py │ │ ├── pipeline_parallel.py │ │ └── optimizer.py │ ├── experimental │ │ └── __init__.py │ ├── rendezvous │ │ ├── __init__.py │ │ └── ray_backend.py │ ├── __init__.py │ ├── logging_utils.py │ ├── kernel │ │ └── __init__.py │ ├── distributed.py │ ├── net_utils.py │ ├── config.py │ ├── torch_dtypes.py │ └── device.py ├── tools │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── schemas.cpython-310.pyc │ │ ├── base_tool.cpython-310.pyc │ │ └── search_tool.cpython-310.pyc │ ├── utils │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── tool_registry.cpython-310.pyc │ │ │ └── search_r1_like_utils.cpython-310.pyc │ │ ├── __init__.py │ │ └── mcp_clients │ │ │ └── utils.py │ ├── __init__.py │ ├── mcp_search_tool.py │ └── schemas.py ├── interactions │ ├── __pycache__ │ │ ├── base.cpython-310.pyc │ │ └── __init__.cpython-310.pyc │ ├── utils │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── interaction_registry.cpython-310.pyc │ │ └── __init__.py │ ├── __init__.py │ └── base.py ├── models │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── registry.cpython-310.pyc │ ├── transformers │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── npu_patch.cpython-310.pyc │ │ │ └── monkey_patch.cpython-310.pyc │ │ ├── __init__.py │ │ └── npu_patch.py │ ├── __init__.py │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── checkpoint_utils │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ └── layers │ │ │ ├── __init__.py │ │ │ └── parallel_rmsnorm.py │ ├── qwen2 │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── checkpoint_utils │ │ │ └── __init__.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_rmsnorm.py │ │ │ └── parallel_linear.py │ │ │ └── __init__.py │ ├── mcore │ │ ├── __init__.py │ │ ├── qwen2_5_vl │ │ │ └── __init__.py │ │ └── mbridge.py │ ├── README.md │ ├── registry.py │ └── weight_loader_registry.py ├── workers │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── fsdp_workers.cpython-310.pyc │ ├── actor │ │ ├── __pycache__ │ │ │ ├── base.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── dp_actor.cpython-310.pyc │ │ ├── __init__.py │ │ └── base.py │ ├── critic │ │ ├── __pycache__ │ │ │ ├── base.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── dp_critic.cpython-310.pyc │ │ ├── __init__.py │ │ └── base.py │ ├── rollout │ │ ├── __pycache__ │ │ │ ├── base.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── schemas.cpython-310.pyc │ │ │ └── hf_rollout.cpython-310.pyc │ │ ├── naive │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── naive_rollout.cpython-310.pyc │ │ │ └── __init__.py │ │ ├── sglang_rollout │ │ │ ├── __pycache__ │ │ │ │ ├── utils.cpython-310.pyc │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── sglang_rollout.cpython-310.pyc │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── vllm_rollout │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── vllm_rollout_spmd.cpython-310.pyc │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── base.py │ ├── reward_manager │ │ ├── __pycache__ │ │ │ ├── dapo.cpython-310.pyc │ │ │ ├── batch.cpython-310.pyc │ │ │ ├── naive.cpython-310.pyc │ │ │ ├── prime.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── registry.cpython-310.pyc │ │ ├── __init__.py │ │ └── registry.py │ ├── sharding_manager │ │ ├── __pycache__ │ │ │ ├── base.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── fsdp_vllm.cpython-310.pyc │ │ │ ├── fsdp_sglang.cpython-310.pyc │ │ │ └── fsdp_ulysses.cpython-310.pyc │ │ ├── __init__.py │ │ ├── base.py │ │ └── fsdp_ulysses.py │ ├── __init__.py │ └── reward_model │ │ ├── __init__.py │ │ ├── megatron │ │ └── __init__.py │ │ └── base.py ├── third_party │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── sglang │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── parallel_state.cpython-310.pyc │ │ └── __init__.py │ ├── vllm │ │ ├── __pycache__ │ │ │ └── __init__.cpython-310.pyc │ │ └── __init__.py │ └── __init__.py ├── single_controller │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── ray │ │ ├── __pycache__ │ │ │ ├── base.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ │ └── __init__.py │ ├── base │ │ ├── __pycache__ │ │ │ ├── worker.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── decorator.cpython-310.pyc │ │ │ └── worker_group.cpython-310.pyc │ │ ├── register_center │ │ │ ├── __pycache__ │ │ │ │ ├── ray.cpython-310.pyc │ │ │ │ └── __init__.cpython-310.pyc │ │ │ ├── __init__.py │ │ │ └── ray.py │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ └── worker_group.py │ │ └── __init__.py │ └── __init__.py ├── experimental │ ├── __init__.py │ └── agent_loop │ │ ├── __init__.py │ │ └── single_turn_agent_loop.py ├── model_merger │ ├── __init__.py │ └── __main__.py ├── __init__.py └── base_config.py ├── .gitignore ├── requirements-npu.txt ├── utils ├── __init__.py └── install_vllm_sglang_mcore.sh └── scripts └── train_grpo.sh /docs/.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /search/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /search/llm_agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.4.1.dev 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | *.pyo 4 | *.pyd 5 | *.pyw 6 | *.pyz 7 | *.pywz 8 | *.pyzz -------------------------------------------------------------------------------- /docs/static/pdfs/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/pdfs/sample.pdf -------------------------------------------------------------------------------- /docs/static/images/cases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/cases.png -------------------------------------------------------------------------------- /docs/static/images/hf-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/hf-logo.png -------------------------------------------------------------------------------- /docs/static/images/datapipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/datapipe.png -------------------------------------------------------------------------------- /docs/static/images/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/framework.png -------------------------------------------------------------------------------- /docs/static/images/eval_prior.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/eval_prior.png -------------------------------------------------------------------------------- /docs/static/images/leaderboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/leaderboard.png -------------------------------------------------------------------------------- /docs/static/images/main_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/main_result.png -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | -------------------------------------------------------------------------------- /verl/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/__pycache__/__init__.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/__init__.cpython-313.pyc -------------------------------------------------------------------------------- /verl/__pycache__/protocol.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/protocol.cpython-310.pyc -------------------------------------------------------------------------------- /verl/__pycache__/protocol.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/protocol.cpython-313.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/fs.cpython-310.pyc -------------------------------------------------------------------------------- /verl/__pycache__/base_config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/base_config.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/config.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/device.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/device.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/__pycache__/schemas.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/schemas.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/hdfs_io.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/hdfs_io.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tracking.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/tracking.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /verl/interactions/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/__pycache__/registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/__pycache__/registry.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/__pycache__/base_tool.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/base_tool.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/__pycache__/search_tool.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/search_tool.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/main_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/__pycache__/main_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fsdp_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/net_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/net_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ray_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/ray_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/vllm_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/vllm_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/reward.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/reward.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/flops_counter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/flops_counter.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/import_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/import_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/py_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/py_functional.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_dtypes.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/metric/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/metric/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/actor/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/critic/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/interactions/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/constants_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/__pycache__/constants_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/torch_functional.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/sampler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/sampler.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/debug/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/debug/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/logger/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/logger/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/metric/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/metric/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/profiler/__pycache__/config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/config.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/profiler/__pycache__/profile.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/profile.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/__pycache__/fsdp_workers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/actor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/config/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/config/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/config/__pycache__/algorithm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/config/__pycache__/algorithm.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/metric_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/metric_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/activation_offload.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/activation_offload.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/profiler/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/critic/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/schemas.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/schemas.cpython-310.pyc -------------------------------------------------------------------------------- /verl/interactions/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/ray/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/sglang/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/sglang/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/utils/__pycache__/tool_registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/utils/__pycache__/tool_registry.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/profiler/__pycache__/performance.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/performance.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/dapo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/dapo.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/transformers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/transformers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/transformers/__pycache__/npu_patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/transformers/__pycache__/npu_patch.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/worker.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/logger/__pycache__/aggregate_logger.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/logger/__pycache__/aggregate_logger.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/batch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/batch.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/naive.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/naive.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/prime.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/prime.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/transformers/__pycache__/monkey_patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/transformers/__pycache__/monkey_patch.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/decorator.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/utils/__pycache__/search_r1_like_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/utils/__pycache__/search_r1_like_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/registry.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/sglang/__pycache__/parallel_state.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/sglang/__pycache__/parallel_state.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_sglang.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_sglang.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /verl/interactions/utils/__pycache__/interaction_registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/utils/__pycache__/interaction_registry.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/search_r1_like_qa_em.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/search_r1_like_qa_em.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s3.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s4.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s4.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__pycache__/sglang_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/sglang_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc -------------------------------------------------------------------------------- /search/install.sh: -------------------------------------------------------------------------------- 1 | 2 | pip install transformers datasets pyserini 3 | 4 | ## install the gpu version faiss to guarantee efficient RL rollout 5 | conda install -c pytorch -c nvidia faiss-gpu=1.8.0 6 | 7 | ## API function 8 | pip install uvicorn fastapi sentence_transformers==3.3.1 -------------------------------------------------------------------------------- /requirements-npu.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | hydra-core 7 | numpy<2.0.0 8 | pandas 9 | peft 10 | pyarrow>=15.0.0 11 | pybind11 12 | pylatexenc 13 | tensordict<=0.6.2 14 | transformers==4.52.4 15 | ray==2.46.0 16 | wandb 17 | mathruler 18 | torchdata 19 | einops 20 | qwen_vl_utils 21 | torchvision==0.20.1 22 | -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model 7 | 8 | custom_reward_function: 9 | path: null 10 | name: compute_score 11 | 12 | ray_init: 13 | num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. 14 | timeline_json_file: null 15 | -------------------------------------------------------------------------------- /search/retrieval/retrieval_request.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | # URL for your local FastAPI server 4 | url = "http://127.0.0.1:8000/retrieve" 5 | 6 | # Example payload 7 | payload = { 8 | "queries": ["What is the capital of France?", "Explain neural networks."] * 200, 9 | "topk": 5, 10 | "return_scores": True 11 | } 12 | 13 | # Send POST request 14 | response = requests.post(url, json=payload) 15 | 16 | # Raise an exception if the request failed 17 | response.raise_for_status() 18 | 19 | # Get the JSON response 20 | retrieved_data = response.json() 21 | 22 | print("Response from server:") 23 | print(retrieved_data) 24 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /search/retrieval_launch.sh: -------------------------------------------------------------------------------- 1 | 2 | file_path=${DATA_DIR}/wiki-18 3 | index_file=$file_path/e5_Flat.index 4 | corpus_file=$file_path/wiki-18.jsonl 5 | retriever_name=e5 6 | retriever_path=${MODEL_DIR}/e5-base-v2 7 | 8 | python scripts/runs/reseek/reseek_search/search/retrieval_server.py --index_path $index_file \ 9 | --corpus_path $corpus_file \ 10 | --topk 3 \ 11 | --retriever_name $retriever_name \ 12 | --retriever_model $retriever_path \ 13 | --faiss_gpu 14 | -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/model_merger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /verl/tools/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /verl/interactions/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | from .sglang_rollout import SGLangRollout 15 | 16 | __all__ = ["SGLangRollout"] 17 | -------------------------------------------------------------------------------- /verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .utils import reduce_metrics 16 | 17 | __all__ = ["reduce_metrics"] 18 | -------------------------------------------------------------------------------- /verl/interactions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023-2024 SGLang Team 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | 17 | __all__ = ["NaiveRollout"] 18 | -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | 17 | __all__ = ["BasePPORewardModel"] 18 | -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | 17 | __all__ = ["MegatronRewardModel"] 18 | -------------------------------------------------------------------------------- /verl/experimental/agent_loop/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .agent_loop import AgentLoopBase, AgentLoopManager 16 | 17 | __all__ = ["AgentLoopBase", "AgentLoopManager"] 18 | -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # APIs kept for backward compatibility purpose 16 | # For new features please develop in verl/utils/profiler/ 17 | from ..profiler import * # noqa 18 | -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/README.md: -------------------------------------------------------------------------------- 1 | ## LiveCodeBench 2 | 3 | ### Introduction 4 | [LiveCodeBench](https://github.com/LiveCodeBench/LiveCodeBench) provides holistic and contamination-free evaluation of coding capabilities of LLMs. Particularly, LiveCodeBench continuously collects new problems over time from contests across three competition platforms -- LeetCode, AtCoder, and CodeForces. 5 | 6 | ### How to reproduce 7 | Our evaluation is grounded on the version found in LiveCodeBench. 8 | > **Installation** 9 | ```bash 10 | # Make sure the CUDA version > 12.0. 11 | pip install -r requirements.txt 12 | pip install flash-attn --no-build-isolation 13 | ``` 14 | 15 | ### Acknowleage 16 | Thank you to the [LiveCodeBench](https://livecodebench.github.io/leaderboard.html) team for their contributions to the open-source community. -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .hf_rollout import HFRollout 17 | from .naive import NaiveRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | 19 | __all__ = ["RLHFDataset", "RMDataset", "SFTDataset"] 20 | -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup 17 | 18 | __all__ = ["Worker", "WorkerGroup", "ClassWithInitArgs", "ResourcePool"] 19 | -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /verl/trainer/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .algorithm import AlgoConfig, FilterGroupsConfig, KLControlConfig, PFPPOConfig 16 | 17 | __all__ = [ 18 | "AlgoConfig", 19 | "FilterGroupsConfig", 20 | "KLControlConfig", 21 | "PFPPOConfig", 22 | ] 23 | -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # APIs kept for backward compatibility purpose 16 | # This file is deprecated, for new features please develop in profiler/performance.py 17 | from verl.utils.profiler.performance import simple_timer, reduce_timing # noqa 18 | -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import config, tokenizer 16 | from .config import omega_conf_to_dataclass 17 | from .tokenizer import hf_processor, hf_tokenizer 18 | 19 | __all__ = tokenizer.__all__ + config.__all__ + ["hf_processor", "hf_tokenizer", "omega_conf_to_dataclass"] 20 | -------------------------------------------------------------------------------- /verl/trainer/constants_ppo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | PPO_RAY_RUNTIME_ENV = { 16 | "env_vars": { 17 | "TOKENIZERS_PARALLELISM": "true", 18 | "NCCL_DEBUG": "WARN", 19 | "VLLM_LOGGING_LEVEL": "WARN", 20 | "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "true", 21 | }, 22 | } 23 | -------------------------------------------------------------------------------- /verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .registry import get_mcore_forward_fn, get_mcore_weight_converter, hf_to_mcore_config, init_mcore_model 17 | 18 | __all__ = ["hf_to_mcore_config", "init_mcore_model", "get_mcore_forward_fn", "get_mcore_weight_converter"] 19 | -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # Copyright (c) 2024 Alibaba PAI Team. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | from .model import Qwen2_5VLModel 19 | from .vision_config import get_vision_model_config, get_vision_projection_config 20 | 21 | __all__ = ["Qwen2_5VLModel", "get_vision_model_config", "get_vision_projection_config"] 22 | -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | 17 | from verl import DataProto 18 | 19 | __all__ = ["BaseRollout"] 20 | 21 | 22 | class BaseRollout(ABC): 23 | """Base class for rollout.""" 24 | 25 | @abstractmethod 26 | def generate_sequences(self, prompts: DataProto) -> DataProto: 27 | """Generate sequences""" 28 | pass 29 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from . import base 17 | from .base import * 18 | 19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 20 | 21 | # Note(haibin.lin): single_controller.__version__ is deprecated 22 | with open(os.path.join(os.path.join(version_folder, os.pardir), "version/version")) as f: 23 | __version__ = f.read().strip() 24 | 25 | 26 | __all__ = base.__all__ 27 | -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import ( 16 | RayClassWithInitArgs, 17 | RayResourcePool, 18 | RayWorkerGroup, 19 | create_colocated_worker_cls, 20 | create_colocated_worker_cls_fused, 21 | ) 22 | 23 | __all__ = [ 24 | "RayClassWithInitArgs", 25 | "RayResourcePool", 26 | "RayWorkerGroup", 27 | "create_colocated_worker_cls", 28 | "create_colocated_worker_cls_fused", 29 | ] 30 | -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from .aggregate_logger import ( 17 | DecoratorLoggerBase, 18 | LocalLogger, 19 | log_with_rank, 20 | print_rank_0, 21 | print_with_rank, 22 | print_with_rank_and_timer, 23 | ) 24 | 25 | __all__ = [ 26 | "LocalLogger", 27 | "DecoratorLoggerBase", 28 | "print_rank_0", 29 | "print_with_rank", 30 | "print_with_rank_and_timer", 31 | "log_with_rank", 32 | ] 33 | -------------------------------------------------------------------------------- /verl/models/mcore/mbridge.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | try: 17 | from mbridge import AutoBridge 18 | from mbridge.utils.post_creation_callbacks import freeze_moe_router, make_value_model 19 | except ImportError: 20 | print("mbridge package not found. Please install mbridge with `pip install verl[mcore]` or `pip install mbridge`") 21 | raise 22 | 23 | __all__ = ["AutoBridge", "make_value_model", "freeze_moe_router"] 24 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelQwen2Attention 16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad 17 | from .parallel_mlp import ParallelQwen2MLP 18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm 19 | 20 | __all__ = [ 21 | "ParallelQwen2Attention", 22 | "ParallelQwen2DecoderLayer", 23 | "ParallelQwen2DecoderLayerRmPad", 24 | "ParallelQwen2MLP", 25 | "ParallelQwen2RMSNorm", 26 | ] 27 | -------------------------------------------------------------------------------- /search/retrieval/build_index.sh: -------------------------------------------------------------------------------- 1 | corpus_file=/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18.jsonl # jsonl 2 | save_dir=/group/40077/shyuli/datasets/RL/wiki 3 | retriever_name=conan 4 | retriever_model=/group/40077/shyuli/models/ours/embedding/conan-0827/ckpts 5 | # /group/40077/shyuli/models/embedding/bge-large-en-v1.5 6 | # /group/40077/shyuli/models/ours/embedding/conan-0827/ckpts 7 | # /group/40077/shyuli/models/embedding/e5-base-v2 8 | # /group/40077/shyuli/models/embedding/qwen/Qwen3-Embedding-0.6B 9 | # 10 | # 11 | # 12 | # 13 | 14 | # change faiss_type to HNSW32/64/128 for ANN indexing 15 | # change retriever_name to bm25 for BM25 indexing 16 | # 使用多 GPU 进行 sentence_transformers 编码 17 | CUDA_VISIBLE_DEVICES=0,1,2,3 python index_builder.py \ 18 | --retrieval_method $retriever_name \ 19 | --model_path $retriever_model \ 20 | --corpus_path $corpus_file \ 21 | --save_dir $save_dir \ 22 | --use_fp16 \ 23 | --max_length 256 \ 24 | --batch_size 64 \ 25 | --pooling_method mean \ 26 | --faiss_type Flat \ 27 | --save_embedding \ 28 | --embedding_path /group/40077/shyuli/datasets/RL/wiki/emb_conan_slices -------------------------------------------------------------------------------- /verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Individual Contributor: Mert Unsal 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .math import compute_score 16 | 17 | 18 | def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos): 19 | """ 20 | This is a demonstration of how the batched reward function should look like. 21 | Typically, you want to use batched reward to speed up the process with parallelization 22 | """ 23 | return [ 24 | compute_score(solution_str, ground_truth) for solution_str, ground_truth in zip(solution_strs, ground_truths) 25 | ] 26 | -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | 18 | import torch 19 | 20 | 21 | def set_basic_config(level): 22 | """ 23 | This function sets the global logging format and level. It will be called when import verl 24 | """ 25 | logging.basicConfig(format="%(levelname)s:%(asctime)s:%(message)s", level=level) 26 | 27 | 28 | def log_to_file(string): 29 | print(string) 30 | if os.path.isdir("logs"): 31 | with open(f"logs/log_{torch.distributed.get_rank()}", "a+") as f: 32 | f.write(string + "\n") 33 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | def __init__(self): 23 | self.timing = {} 24 | 25 | def __enter__(self): 26 | pass 27 | 28 | def __exit__(self, exc_type, exc_value, traceback): 29 | pass 30 | 31 | def preprocess_data(self, data: DataProto) -> DataProto: 32 | return data 33 | 34 | def postprocess_data(self, data: DataProto) -> DataProto: 35 | return data 36 | -------------------------------------------------------------------------------- /verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .registry import get_reward_manager_cls, register # noqa: I001 16 | from .batch import BatchRewardManager 17 | from .dapo import DAPORewardManager 18 | from .naive import NaiveRewardManager 19 | from .prime import PrimeRewardManager 20 | 21 | # Note(haibin.lin): no need to include all reward managers here in case of complicated dependencies 22 | __all__ = [ 23 | "BatchRewardManager", 24 | "DAPORewardManager", 25 | "NaiveRewardManager", 26 | "PrimeRewardManager", 27 | "register", 28 | "get_reward_manager_cls", 29 | ] 30 | -------------------------------------------------------------------------------- /verl/utils/dataset/sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Amazon.com Inc and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from abc import abstractmethod 15 | from collections.abc import Sized 16 | 17 | from omegaconf import DictConfig 18 | from torch.utils.data import Sampler 19 | 20 | from verl import DataProto 21 | 22 | 23 | class AbstractSampler(Sampler[int]): 24 | @abstractmethod 25 | def __init__( 26 | self, 27 | data_source: Sized, 28 | config: DictConfig, 29 | ): 30 | pass 31 | 32 | 33 | class AbstractCurriculumSampler(AbstractSampler): 34 | @abstractmethod 35 | def update(self, batch: DataProto) -> None: 36 | pass 37 | -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | import torch 21 | 22 | from verl import DataProto 23 | 24 | __all__ = ["BasePPOCritic"] 25 | 26 | 27 | class BasePPOCritic(ABC): 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /verl/utils/profiler/empty_annotations.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Callable, Optional 16 | 17 | 18 | def mark_start_range( 19 | message: Optional[str] = None, 20 | color: Optional[str] = None, 21 | domain: Optional[str] = None, 22 | category: Optional[str] = None, 23 | ) -> None: 24 | pass 25 | 26 | 27 | def mark_end_range(range_id: str) -> None: 28 | pass 29 | 30 | 31 | def mark_annotate( 32 | message: Optional[str] = None, 33 | color: Optional[str] = None, 34 | domain: Optional[str] = None, 35 | category: Optional[str] = None, 36 | ) -> Callable: 37 | def decorator(func): 38 | return func 39 | 40 | return decorator 41 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | ParallelLlamaForCausalLM, 17 | # rmpad with megatron 18 | ParallelLlamaForCausalLMRmPad, 19 | # rmpad with megatron and pipeline parallelism 20 | ParallelLlamaForCausalLMRmPadPP, 21 | ParallelLlamaForValueRmPad, 22 | ParallelLlamaForValueRmPadPP, 23 | # original model with megatron 24 | ParallelLlamaModel, 25 | ) 26 | 27 | __all__ = [ 28 | "ParallelLlamaForCausalLM", 29 | "ParallelLlamaForCausalLMRmPad", 30 | "ParallelLlamaForCausalLMRmPadPP", 31 | "ParallelLlamaForValueRmPad", 32 | "ParallelLlamaForValueRmPadPP", 33 | "ParallelLlamaModel", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_qwen2_megatron import ( 16 | ParallelQwen2ForCausalLM, 17 | # rmpad with megatron 18 | ParallelQwen2ForCausalLMRmPad, 19 | # rmpad with megatron and pipeline parallelism 20 | ParallelQwen2ForCausalLMRmPadPP, 21 | ParallelQwen2ForValueRmPad, 22 | ParallelQwen2ForValueRmPadPP, 23 | # original model with megatron 24 | ParallelQwen2Model, 25 | ) 26 | 27 | __all__ = [ 28 | "ParallelQwen2ForCausalLM", 29 | "ParallelQwen2ForCausalLMRmPad", 30 | "ParallelQwen2ForCausalLMRmPadPP", 31 | "ParallelQwen2ForValueRmPad", 32 | "ParallelQwen2ForValueRmPadPP", 33 | "ParallelQwen2Model", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_linear import ( 18 | LinearForLastLayer, 19 | MergedColumnParallelLinear, 20 | QKVParallelLinear, 21 | ) 22 | from .parallel_mlp import ParallelLlamaMLP 23 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 24 | 25 | __all__ = [ 26 | "LinearForLastLayer", 27 | "MergedColumnParallelLinear", 28 | "QKVParallelLinear", 29 | "ParallelLlamaAttention", 30 | "ParallelLlamaDecoderLayer", 31 | "ParallelLlamaDecoderLayerRmPad", 32 | "ParallelLlamaMLP", 33 | "ParallelLlamaRMSNorm", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. 27 | -------------------------------------------------------------------------------- /search/retrieval/build_index_vllm_api.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # vLLM HTTP API 版本的索引构建脚本 4 | # 使用方法: 5 | # 1. 先启动 vLLM 服务器 6 | # 2. 再运行此脚本构建索引 7 | 8 | # 配置参数 9 | CORPUS_PATH="/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18.jsonl" # 替换为你的语料库路径 10 | SAVE_DIR="/group/40077/shyuli/datasets/RL/hot_benchmark/wiki" 11 | RETRIEVAL_METHOD=qwen # 或者 bge, contriever 等 12 | BATCH_SIZE=64 # API 调用的批次大小 13 | VLLM_API_URL="http://localhost:8000" # vLLM 服务器地址 14 | #EMBEDDING_PATH="/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18-e5/emb_e5.memmap" # 预计算的 embedding 文件路径 15 | corpus_file=/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18.jsonl # jsonl 16 | # change faiss_type to HNSW32/64/128 for ANN indexing 17 | # change retriever_name to bm25 for BM25 indexing 18 | 19 | #"vllm serve /group/40077/shyuli/models/embedding/e5-base-v2 --task embed --host 0.0.0.0 --port 8000 --data-parallel-size 2" 20 | 21 | echo 22 | echo "开始构建索引..." 23 | 24 | # 运行索引构建 25 | python index_builder_api.py \ 26 | --retrieval_method $RETRIEVAL_METHOD \ 27 | --corpus_path $CORPUS_PATH \ 28 | --save_dir $SAVE_DIR \ 29 | --batch_size $BATCH_SIZE \ 30 | --vllm_api_url $VLLM_API_URL \ 31 | --max_length 256 \ 32 | --save_embedding \ 33 | --faiss_type "Flat" 34 | 35 | # --embedding_path $EMBEDDING_PATH \ 36 | echo "索引构建完成!" -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | import ray 18 | 19 | 20 | @ray.remote 21 | class WorkerGroupRegisterCenter: 22 | def __init__(self, rank_zero_info): 23 | self.rank_zero_info = rank_zero_info 24 | # rank -> node_id 25 | self.workers_info: Dict[int, str] = {} 26 | 27 | def get_rank_zero_info(self): 28 | return self.rank_zero_info 29 | 30 | def set_worker_info(self, rank, node_id) -> None: 31 | self.workers_info[rank] = node_id 32 | 33 | def get_worker_info(self) -> Dict[int, str]: 34 | return self.workers_info 35 | 36 | 37 | def create_worker_group_register_center(name, info): 38 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 39 | -------------------------------------------------------------------------------- /verl/utils/kernel/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 19 | # 20 | # Licensed under the Apache License, Version 2.0 (the "License"); 21 | # you may not use this file except in compliance with the License. 22 | # You may obtain a copy of the License at 23 | # 24 | # http://www.apache.org/licenses/LICENSE-2.0 25 | # 26 | # Unless required by applicable law or agreed to in writing, software 27 | # distributed under the License is distributed on an "AS IS" BASIS, 28 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 | # See the License for the specific language governing permissions and 30 | # limitations under the License. 31 | 32 | -------------------------------------------------------------------------------- /verl/utils/profiler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..import_utils import is_nvtx_available 16 | from .performance import GPUMemoryLogger, log_gpu_memory_usage, simple_timer 17 | from .profile import DistProfilerExtension, ProfilerConfig 18 | 19 | if is_nvtx_available(): 20 | from .nvtx_profile import NsightSystemsProfiler as DistProfiler 21 | from .nvtx_profile import mark_annotate, mark_end_range, mark_start_range, marked_timer 22 | else: 23 | from .performance import marked_timer 24 | from .profile import DistProfiler, mark_annotate, mark_end_range, mark_start_range 25 | 26 | __all__ = [ 27 | "GPUMemoryLogger", 28 | "log_gpu_memory_usage", 29 | "mark_start_range", 30 | "mark_end_range", 31 | "mark_annotate", 32 | "DistProfiler", 33 | "DistProfilerExtension", 34 | "ProfilerConfig", 35 | "simple_timer", 36 | "marked_timer", 37 | ] 38 | -------------------------------------------------------------------------------- /verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from mathruler.grader import extract_boxed_content, grade_answer 17 | 18 | 19 | def format_reward(predict_str: str) -> float: 20 | pattern = re.compile(r".*.*\\boxed\{.*\}.*", re.DOTALL) 21 | match_result = re.fullmatch(pattern, predict_str) 22 | return 1.0 if match_result else 0.0 23 | 24 | 25 | def acc_reward(predict_str: str, ground_truth: str, use_boxed: bool = True) -> float: 26 | if use_boxed: 27 | answer = extract_boxed_content(predict_str) 28 | else: 29 | answer = predict_str 30 | return 1.0 if grade_answer(answer, ground_truth) else 0.0 31 | 32 | 33 | def compute_score(predict_str: str, ground_truth: str, use_boxed: bool = True, format_score: float = 0.1) -> float: 34 | return (1.0 - format_score) * acc_reward(predict_str, ground_truth, use_boxed) + format_score * format_reward( 35 | predict_str 36 | ) 37 | -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | 16 | import os 17 | 18 | import torch.distributed 19 | 20 | from verl.utils.device import get_nccl_backend, get_torch_device 21 | 22 | 23 | def initialize_global_process_group(timeout_second=36000): 24 | from datetime import timedelta 25 | 26 | torch.distributed.init_process_group( 27 | get_nccl_backend(), 28 | timeout=timedelta(seconds=timeout_second), 29 | init_method=os.environ.get("DIST_INIT_METHOD", None), 30 | ) 31 | local_rank = int(os.environ["LOCAL_RANK"]) 32 | rank = int(os.environ["RANK"]) 33 | world_size = int(os.environ["WORLD_SIZE"]) 34 | 35 | if torch.distributed.is_initialized(): 36 | get_torch_device().set_device(local_rank) 37 | return local_rank, rank, world_size 38 | 39 | 40 | def destroy_global_process_group(): 41 | if torch.distributed.is_initialized(): 42 | torch.distributed.destroy_process_group() 43 | -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | from verl.utils.device import get_device_id 18 | 19 | 20 | class MemoryBuffer: 21 | def __init__(self, numel, numel_padded, dtype): 22 | self.numel = numel 23 | self.numel_padded = numel_padded 24 | self.dtype = dtype 25 | self.data = torch.zeros(self.numel_padded, dtype=self.dtype, device=get_device_id(), requires_grad=False) 26 | 27 | def zero(self): 28 | """Reset the buffer to zero.""" 29 | self.data.zero_() 30 | 31 | def get(self, shape, start_index): 32 | """Return a tensor with the input `shape` as a view into the 33 | 1-D data starting at `start_index`.""" 34 | end_index = start_index + shape.numel() 35 | assert end_index <= self.numel, "requested tensor is out of the buffer range." 36 | buffer_tensor = self.data[start_index:end_index] 37 | buffer_tensor = buffer_tensor.view(shape) 38 | return buffer_tensor 39 | -------------------------------------------------------------------------------- /verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | try: 16 | from math_verify.errors import TimeoutException 17 | from math_verify.metric import math_metric 18 | from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig 19 | except ImportError: 20 | print("To use Math-Verify, please install it first by running `pip install math-verify`.") 21 | 22 | 23 | def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> bool: 24 | verify_func = math_metric( 25 | gold_extraction_target=(LatexExtractionConfig(),), 26 | pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), 27 | ) 28 | ret_score = 0.0 29 | 30 | # Wrap the ground truth in \boxed{} format for verification 31 | ground_truth_boxed = "\\boxed{" + ground_truth + "}" 32 | try: 33 | ret_score, _ = verify_func([ground_truth_boxed], [model_output]) 34 | except Exception: 35 | pass 36 | except TimeoutException: 37 | ret_score = timeout_score 38 | 39 | return ret_score 40 | -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from importlib.metadata import PackageNotFoundError, version 16 | 17 | from .vllm_rollout_spmd import vLLMAsyncRollout, vLLMRollout # noqa: F401 18 | 19 | 20 | def get_version(pkg): 21 | try: 22 | return version(pkg) 23 | except PackageNotFoundError: 24 | return None 25 | 26 | 27 | vllm_package_name = "vllm" 28 | vllm_package_version = get_version(vllm_package_name) 29 | if vllm_package_version is None: 30 | raise PackageNotFoundError( 31 | "To use vllm rollout, please ensure the 'vllm' package is properly installed. See " 32 | "https://verl.readthedocs.io/en/latest/start/install.html for more details" 33 | ) 34 | 35 | if "ROCM_PATH" in os.environ: 36 | import re 37 | 38 | match = re.match(r"(\d+\.\d+\.?\d*)", vllm_package_version) 39 | if match: 40 | vllm_package_version = match.group(1) 41 | else: 42 | raise ValueError(f"Warning: Could not parse version format: {vllm_package_version}") 43 | 44 | vllm_mode = "spmd" 45 | -------------------------------------------------------------------------------- /verl/workers/reward_manager/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["register", "get_reward_manager_cls"] 16 | 17 | REWARD_MANAGER_REGISTRY = {} 18 | 19 | 20 | def register(name): 21 | """Decorator to register a reward manager class with a given name. 22 | 23 | Args: 24 | name: `(str)` 25 | The name of the reward manager. 26 | """ 27 | 28 | def decorator(cls): 29 | if name in REWARD_MANAGER_REGISTRY and REWARD_MANAGER_REGISTRY[name] != cls: 30 | raise ValueError( 31 | f"Reward manager {name} has already been registered: {REWARD_MANAGER_REGISTRY[name]} vs {cls}" 32 | ) 33 | REWARD_MANAGER_REGISTRY[name] = cls 34 | return cls 35 | 36 | return decorator 37 | 38 | 39 | def get_reward_manager_cls(name): 40 | """Get the reward manager class with a given name. 41 | 42 | Args: 43 | name: `(str)` 44 | The name of the reward manager. 45 | 46 | Returns: 47 | `(type)`: The reward manager class. 48 | """ 49 | if name not in REWARD_MANAGER_REGISTRY: 50 | raise ValueError(f"Unknown reward manager: {name}") 51 | return REWARD_MANAGER_REGISTRY[name] 52 | -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | device: cuda 5 | 6 | data: 7 | path: ~/data/rlhf/math/test.parquet 8 | prompt_key: prompt 9 | n_samples: 5 10 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 11 | batch_size: 128 12 | 13 | model: 14 | path: ~/models/Qwen2-7B-Instruct 15 | external_lib: null 16 | rollout: 17 | name: vllm 18 | mode: sync # sync: LLM, async: AsyncLLM 19 | temperature: 1.0 20 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 21 | top_p: 0.7 22 | prompt_length: 1536 23 | response_length: 512 24 | # for vllm rollout 25 | dtype: bfloat16 # should align with FSDP 26 | gpu_memory_utilization: 0.5 27 | ignore_eos: False 28 | enforce_eager: True 29 | free_cache_engine: True 30 | load_format: dummy_dtensor 31 | tensor_model_parallel_size: 1 32 | max_num_batched_tokens: 8192 33 | max_model_len: null 34 | max_num_seqs: 1024 35 | log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu 36 | log_prob_micro_batch_size_per_gpu: 8 37 | # for hf rollout 38 | do_sample: True 39 | disable_log_stats: True 40 | enable_chunked_prefill: True 41 | n: 1 42 | # support logging rollout prob for debugging purpose 43 | calculate_log_probs: False 44 | actor: 45 | strategy: fsdp # This is for backward-compatibility 46 | ulysses_sequence_parallel_size: 1 # sp size 47 | entropy_from_logits_with_chunking: False # calculate entropy with chunking to reduce memory peak 48 | entropy_checkpointing: False # recompute entropy 49 | fsdp_config: 50 | fsdp_size: -1 51 | forward_prefetch: False # FSDP1 forward_prefetch configuration 52 | 53 | ray_init: 54 | num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. 55 | timeline_json_file: null 56 | -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 3 | ## Adding a New Huggingface Model 4 | ### Step 1: Copy the model file from HF to verl 5 | - Add a new file under verl/models/hf 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf 7 | 8 | ### Step 2: Modify the model file to use packed inputs 9 | - Remove all the code related to inference (kv cache) 10 | - Modify the inputs to include only 11 | - input_ids (total_nnz,) 12 | - cu_seqlens (total_nnz + 1,) 13 | - max_seqlen_in_batch: int 14 | - Note that this requires using flash attention with causal mask. 15 | 16 | ### Step 2.5: Add tests 17 | - Add a test to compare this version and the huggingface version 18 | - Following the infrastructure and add tests to tests/models/hf 19 | 20 | ### Step 3: Add a function to apply tensor parallelism 21 | - Please follow 22 | - https://pytorch.org/docs/stable/distributed.tensor.parallel.html 23 | - https://pytorch.org/tutorials/intermediate/TP_tutorial.html 24 | - General comments 25 | - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward. 26 | 27 | ### Step 4: Add a function to apply data parallelism 28 | - Please use FSDP2 APIs 29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413 30 | 31 | ### Step 5: Add a function to apply pipeline parallelism 32 | - Comes in Pytorch 2.4 33 | - Currently only in alpha in nightly version 34 | - Check torchtitan for more details 35 | 36 | -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for reward model 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | from verl import DataProto 21 | 22 | 23 | class BasePPORewardModel(ABC): 24 | def __init__(self, config): 25 | self.config = config 26 | 27 | @abstractmethod 28 | def compute_reward(self, data: DataProto) -> DataProto: 29 | """Computing reward given input_ids. The transformers should output a tensor with shape 30 | [batch_size, sequence_length], and the value at [EOS] mask should be gathered. 31 | 32 | Args: 33 | data: must contain keys "input_ids", "attention_mask" and "position_ids". 34 | - input_ids: [batch_size, sequence_length] 35 | - attention_mask: [batch_size, sequence_length] 36 | - position_ids: [batch_size, sequence_length] 37 | 38 | Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward. 39 | Other position should have zero reward. Note that this may change in the future if we use 40 | dense reward. So, we leave the interface for general case. 41 | - reward: [batch_size, sequence_length]. 42 | 43 | """ 44 | pass 45 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | 17 | import torch 18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 19 | from megatron.core import ModelParallelConfig 20 | from torch import nn 21 | from transformers import LlamaConfig 22 | 23 | from verl.utils.megatron import sequence_parallel as sp_utils 24 | 25 | 26 | class ParallelLlamaRMSNorm(nn.Module): 27 | def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig): 28 | """ 29 | LlamaRMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine( 43 | input=hidden_states, 44 | weight=self.weight, 45 | normalized_shape=self.normalized_shape, 46 | eps=self.variance_epsilon, 47 | memory_efficient=True, 48 | ) 49 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | 17 | import torch 18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 19 | from megatron.core import ModelParallelConfig 20 | from torch import nn 21 | from transformers import Qwen2Config 22 | 23 | from verl.utils.megatron import sequence_parallel as sp_utils 24 | 25 | 26 | class ParallelQwen2RMSNorm(nn.Module): 27 | def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig): 28 | """ 29 | Qwen2RMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine( 43 | input=hidden_states, 44 | weight=self.weight, 45 | normalized_shape=self.normalized_shape, 46 | eps=self.variance_epsilon, 47 | memory_efficient=True, 48 | ) 49 | -------------------------------------------------------------------------------- /verl/utils/metric/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Metrics utils. 16 | """ 17 | 18 | from typing import Any, Dict, List 19 | 20 | import numpy as np 21 | 22 | 23 | def reduce_metrics(metrics: Dict[str, List[Any]]) -> Dict[str, Any]: 24 | """ 25 | Reduces a dictionary of metric lists by computing the mean, max, or min of each list. 26 | The reduce operation is determined by the key name: 27 | - If the key contains "max", np.max is used 28 | - If the key contains "min", np.min is used 29 | - Otherwise, np.mean is used 30 | 31 | Args: 32 | metrics: A dictionary mapping metric names to lists of metric values. 33 | 34 | Returns: 35 | A dictionary with the same keys but with each list replaced by its reduced value. 36 | 37 | Example: 38 | >>> metrics = { 39 | ... "loss": [1.0, 2.0, 3.0], 40 | ... "accuracy": [0.8, 0.9, 0.7], 41 | ... "max_reward": [5.0, 8.0, 6.0], 42 | ... "min_error": [0.1, 0.05, 0.2] 43 | ... } 44 | >>> reduce_metrics(metrics) 45 | {"loss": 2.0, "accuracy": 0.8, "max_reward": 8.0, "min_error": 0.05} 46 | """ 47 | for key, val in metrics.items(): 48 | if "max" in key: 49 | metrics[key] = np.max(val) 50 | elif "min" in key: 51 | metrics[key] = np.min(val) 52 | else: 53 | metrics[key] = np.mean(val) 54 | return metrics 55 | -------------------------------------------------------------------------------- /verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from megatron.core import parallel_state as mpu 19 | 20 | 21 | def mark_parameter_as_sequence_parallel(parameter): 22 | parameter.sequence_parallel = True 23 | 24 | 25 | def is_sequence_parallel_param(param): 26 | return hasattr(param, "sequence_parallel") and param.sequence_parallel 27 | 28 | 29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor): 30 | """pad the tokens such that the total length is a multiple of sp world size 31 | 32 | Args: 33 | unpad_tokens: (total_nnz, ...). Tokens after removing padding 34 | 35 | Returns: 36 | the padded tokens: (total_nnz + pad_size,...) 37 | 38 | """ 39 | total_nnz = unpad_tokens.shape[0] 40 | sp_world_size = mpu.get_tensor_model_parallel_world_size() 41 | 42 | pad_size = 0 if total_nnz % sp_world_size == 0 else sp_world_size - total_nnz % sp_world_size 43 | 44 | if pad_size > 0: 45 | if unpad_tokens.ndim == 1: 46 | unpad_tokens = F.pad(unpad_tokens, (0, pad_size)) 47 | elif unpad_tokens.ndim == 2: 48 | unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size)) 49 | else: 50 | raise NotImplementedError(f"Padding dim {unpad_tokens.ndim()} is not supported") 51 | 52 | return unpad_tokens 53 | -------------------------------------------------------------------------------- /verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from importlib.metadata import PackageNotFoundError, version 16 | 17 | from packaging import version as vs 18 | 19 | from verl.utils.import_utils import is_sglang_available 20 | 21 | 22 | def get_version(pkg): 23 | try: 24 | return version(pkg) 25 | except PackageNotFoundError: 26 | return None 27 | 28 | 29 | package_name = "vllm" 30 | package_version = get_version(package_name) 31 | vllm_version = None 32 | 33 | if package_version is None: 34 | if not is_sglang_available(): 35 | raise ValueError( 36 | f"vllm version {package_version} not supported and SGLang also not Found. Currently supported " 37 | f"vllm versions are 0.7.0+" 38 | ) 39 | elif vs.parse(package_version) >= vs.parse("0.7.0"): 40 | vllm_version = package_version 41 | from vllm import LLM 42 | from vllm.distributed import parallel_state 43 | else: 44 | if vs.parse(package_version) in [vs.parse("0.5.4"), vs.parse("0.6.3")]: 45 | raise ValueError( 46 | f"vLLM version {package_version} support has been removed. vLLM 0.5.4 and 0.6.3 are no longer " 47 | f"supported. Please use vLLM 0.7.0 or later." 48 | ) 49 | if not is_sglang_available(): 50 | raise ValueError( 51 | f"vllm version {package_version} not supported and SGLang also not Found. Currently supported " 52 | f"vllm versions are 0.7.0+" 53 | ) 54 | 55 | __all__ = ["LLM", "parallel_state"] 56 | -------------------------------------------------------------------------------- /verl/tools/utils/mcp_clients/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import threading 17 | import time 18 | 19 | from mcp import Tool 20 | 21 | logger = logging.getLogger(__file__) 22 | 23 | 24 | class TokenBucket: 25 | def __init__(self, rate_limit: float): 26 | self.rate_limit = rate_limit # tokens per second 27 | self.tokens = rate_limit 28 | self.last_update = time.time() 29 | self.lock = threading.Lock() 30 | 31 | def acquire(self) -> bool: 32 | with self.lock: 33 | now = time.time() 34 | # Add new tokens based on time elapsed 35 | new_tokens = (now - self.last_update) * self.rate_limit 36 | self.tokens = min(self.rate_limit, self.tokens + new_tokens) 37 | self.last_update = now 38 | 39 | if self.tokens >= 1: 40 | self.tokens -= 1 41 | return True 42 | return False 43 | 44 | 45 | def mcp2openai(mcp_tool: Tool) -> dict: 46 | """Convert a MCP Tool to an OpenAI ChatCompletionTool.""" 47 | openai_format = { 48 | "type": "function", 49 | "function": { 50 | "name": mcp_tool.name, 51 | "description": mcp_tool.description, 52 | "parameters": mcp_tool.inputSchema, 53 | "strict": False, 54 | }, 55 | } 56 | if not openai_format["function"]["parameters"].get("required", None): 57 | openai_format["function"]["parameters"]["required"] = [] 58 | return openai_format 59 | -------------------------------------------------------------------------------- /verl/utils/profiler/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass, field 16 | 17 | from verl.base_config import BaseConfig 18 | 19 | 20 | @dataclass(frozen=True) 21 | class ProfilerConfig(BaseConfig): 22 | """Worker profiler config. Currently only support Nsight system profiler.""" 23 | 24 | # True for each task has its own database, False for all tasks in one training step share one database. 25 | discrete: bool = False 26 | 27 | # Whether to profile all ranks. 28 | all_ranks: bool = False 29 | 30 | # The ranks that will be profiled. [] or [0,1,...] 31 | ranks: list[int] = field(default_factory=list) 32 | 33 | def union(self, other: "ProfilerConfig") -> "ProfilerConfig": 34 | return ProfilerConfig( 35 | all_ranks=self.all_ranks or other.all_ranks, 36 | ranks=list(set(self.ranks or []) | set(other.ranks or [])), 37 | discrete=self.discrete or other.discrete, 38 | ) 39 | 40 | def intersect(self, other: "ProfilerConfig") -> "ProfilerConfig": 41 | return ProfilerConfig( 42 | all_ranks=self.all_ranks and other.all_ranks, 43 | ranks=list(set(self.ranks or []) & set(other.ranks or [])), 44 | discrete=self.discrete and other.discrete, 45 | ) 46 | 47 | def __post_init__(self) -> None: 48 | """config validation logics go here""" 49 | assert isinstance(self.ranks, (set, list, tuple)), ( 50 | f"Profiler ranks must be of type list, got {type(self.ranks)}" 51 | ) 52 | -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu 4 | micro_batch_size_per_gpu: 4 # this is also val batch size 5 | train_files: ~/data/gsm8k/train.parquet 6 | val_files: ~/data/gsm8k/test.parquet 7 | # Single-turn settings 8 | prompt_key: question 9 | response_key: answer 10 | prompt_dict_keys: null 11 | response_dict_keys: null 12 | # Multi-turn settings 13 | multiturn: 14 | enable: false # Set to true to use multi-turn dataset 15 | messages_key: messages # Key for messages list in multi-turn mode 16 | tools_key: tools # Key for tools list in multi-turn mode 17 | enable_thinking_key: enable_thinking # Whether to enable thinking in multi-turn mode 18 | max_length: 1024 19 | truncation: error 20 | balance_dp_token: False 21 | chat_template: null 22 | custom_cls: 23 | path: null 24 | name: null 25 | use_shm: False 26 | model: 27 | partial_pretrain: ~/models/gemma-1.1-7b-it 28 | use_shm: False 29 | fsdp_config: 30 | model_dtype: fp32 31 | wrap_policy: 32 | min_num_params: 0 33 | cpu_offload: False 34 | offload_params: False 35 | external_lib: null 36 | enable_gradient_checkpointing: True 37 | trust_remote_code: False 38 | lora_rank: 0 # Set to positive value to enable LoRA (e.g., 32) 39 | lora_alpha: 16 # LoRA scaling factor 40 | target_modules: all-linear # Target modules for LoRA adaptation 41 | use_liger: False 42 | strategy: fsdp2 43 | optim: 44 | lr: 1e-5 45 | betas: [0.9, 0.95] 46 | weight_decay: 0.01 47 | warmup_steps_ratio: 0.1 48 | clip_grad: 1.0 49 | lr_scheduler: cosine 50 | ulysses_sequence_parallel_size: 1 51 | use_remove_padding: False 52 | trainer: 53 | default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name} 54 | default_hdfs_dir: null 55 | resume_path: null 56 | project_name: gsm8k-sft 57 | experiment_name: test 58 | total_epochs: 4 59 | total_training_steps: null 60 | logger: [ 'console', 'wandb' ] 61 | seed: 1 62 | 63 | save_freq: -1 64 | test_freq: -1 65 | nnodes: 1 66 | n_gpus_per_node: 8 67 | max_ckpt_to_keep: null # TODO 68 | -------------------------------------------------------------------------------- /verl/models/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import importlib 16 | from typing import List, Optional, Type 17 | 18 | import torch.nn as nn 19 | 20 | # Supported models in Megatron-LM 21 | # Architecture -> (module, class). 22 | _MODELS = { 23 | "LlamaForCausalLM": ( 24 | "llama", 25 | ("ParallelLlamaForCausalLMRmPadPP", "ParallelLlamaForValueRmPadPP", "ParallelLlamaForCausalLMRmPad"), 26 | ), 27 | "Qwen2ForCausalLM": ( 28 | "qwen2", 29 | ("ParallelQwen2ForCausalLMRmPadPP", "ParallelQwen2ForValueRmPadPP", "ParallelQwen2ForCausalLMRmPad"), 30 | ), 31 | "MistralForCausalLM": ( 32 | "mistral", 33 | ("ParallelMistralForCausalLMRmPadPP", "ParallelMistralForValueRmPadPP", "ParallelMistralForCausalLMRmPad"), 34 | ), 35 | } 36 | 37 | 38 | # return model class 39 | class ModelRegistry: 40 | @staticmethod 41 | def load_model_cls(model_arch: str, value=False) -> Optional[Type[nn.Module]]: 42 | if model_arch not in _MODELS: 43 | return None 44 | 45 | megatron = "megatron" 46 | 47 | module_name, model_cls_name = _MODELS[model_arch] 48 | if not value: # actor/ref 49 | model_cls_name = model_cls_name[0] 50 | elif value: # critic/rm 51 | model_cls_name = model_cls_name[1] 52 | 53 | module = importlib.import_module(f"verl.models.{module_name}.{megatron}.modeling_{module_name}_megatron") 54 | return getattr(module, model_cls_name, None) 55 | 56 | @staticmethod 57 | def get_supported_archs() -> List[str]: 58 | return list(_MODELS.keys()) 59 | -------------------------------------------------------------------------------- /verl/utils/net_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. 27 | import ipaddress 28 | 29 | 30 | def is_ipv4(ip_str: str) -> bool: 31 | """ 32 | Check if the given string is an IPv4 address 33 | 34 | Args: 35 | ip_str: The IP address string to check 36 | 37 | Returns: 38 | bool: Returns True if it's an IPv4 address, False otherwise 39 | """ 40 | try: 41 | ipaddress.IPv4Address(ip_str) 42 | return True 43 | except ipaddress.AddressValueError: 44 | return False 45 | 46 | 47 | def is_ipv6(ip_str: str) -> bool: 48 | """ 49 | Check if the given string is an IPv6 address 50 | 51 | Args: 52 | ip_str: The IP address string to check 53 | 54 | Returns: 55 | bool: Returns True if it's an IPv6 address, False otherwise 56 | """ 57 | try: 58 | ipaddress.IPv6Address(ip_str) 59 | return True 60 | except ipaddress.AddressValueError: 61 | return False 62 | -------------------------------------------------------------------------------- /verl/models/transformers/npu_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Copyright 2025 The Qwen Team and The HuggingFace Inc. team 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from typing import Tuple 18 | 19 | import torch 20 | import torch_npu 21 | from torch_npu import npu_rotary_mul as apply_rotary_emb 22 | from transformers.models.qwen2_5_vl import modeling_qwen2_5_vl 23 | from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2RMSNorm 24 | 25 | 26 | # This patch takes effect when using apply_rotary_pos_emb_flashatt on qwen2_5_vl and will be removed in 27 | # subsequent versions 28 | # https://github.com/huggingface/transformers/pull/38491 29 | def apply_rotary_pos_emb_flashatt_npu( 30 | q: torch.Tensor, k: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor 31 | ) -> Tuple[torch.Tensor, torch.Tensor]: 32 | cos = cos.chunk(2, dim=-1)[0].contiguous() 33 | sin = sin.chunk(2, dim=-1)[0].contiguous() 34 | cos = cos.repeat(1, 2) 35 | sin = sin.repeat(1, 2) 36 | q_embed = apply_rotary_emb( 37 | q.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float() 38 | ).type_as(q) 39 | k_embed = apply_rotary_emb( 40 | k.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float() 41 | ).type_as(k) 42 | return q_embed, k_embed 43 | 44 | 45 | # This api can improve performance on ASCEND NPU 46 | def rms_norm_forward(self, x): 47 | return torch_npu.npu_rms_norm(x, self.weight, epsilon=self.variance_epsilon)[0] 48 | 49 | 50 | Qwen2RMSNorm.forward = rms_norm_forward 51 | modeling_qwen2_5_vl.apply_rotary_pos_emb_flashatt = apply_rotary_pos_emb_flashatt_npu 52 | -------------------------------------------------------------------------------- /verl/model_merger/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | This module is used to merge huggingface model and test verl checkpoints from FSDP and Megatron backends. 17 | 18 | To merge FSDP checkpoints: 19 | ```sh 20 | python -m verl.model_merger merge \ 21 | --backend fsdp \ 22 | --local_dir checkpoints/verl_fsdp_gsm8k_examples/qwen2_5_0b5_fsdp_saveload/global_step_1/actor \ 23 | --target_dir /path/to/merged_hf_model 24 | ``` 25 | 26 | To merge Megatron checkpoints: 27 | ```sh 28 | python -m verl.model_merger merge \ 29 | --backend megatron \ 30 | --tie-word-embedding \ 31 | --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor \ 32 | --target_dir /path/to/merged_hf_model 33 | ``` 34 | 35 | For more details, please refer to documentation: 36 | https://verl.readthedocs.io/en/latest/advance/checkpoint.html#convert-fsdp-and-megatron-checkpoints-to-huggingface-format-model 37 | """ 38 | 39 | from .base_model_merger import generate_config_from_args, parse_args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | config = generate_config_from_args(args) 45 | print(f"config: {config}") 46 | 47 | if config.backend == "fsdp": 48 | from .fsdp_model_merger import FSDPModelMerger 49 | 50 | merger = FSDPModelMerger(config) 51 | elif config.backend == "megatron": 52 | from .megatron_model_merger import MegatronModelMerger 53 | 54 | merger = MegatronModelMerger(config) 55 | else: 56 | raise NotImplementedError(f"Unknown backend: {config.backend}") 57 | 58 | merger.merge_and_save() 59 | merger.cleanup() 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /verl/utils/megatron/dist_checkpointing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from megatron.core import dist_checkpointing, mpu 16 | from megatron.core.dist_checkpointing.serialization import ( 17 | get_default_load_sharded_strategy, 18 | get_default_save_sharded_strategy, 19 | ) 20 | from megatron.core.dist_checkpointing.strategies.fully_parallel import ( 21 | FullyParallelLoadStrategyWrapper, 22 | FullyParallelSaveStrategyWrapper, 23 | ) 24 | 25 | 26 | def save_dist_checkpointing(sharded_state_dict, ckpt_path, async_save=False): 27 | validate_sharding_integrity = True 28 | # Get checkpointing strategies 29 | save_strategy = get_default_save_sharded_strategy("torch_dist") 30 | save_strategy = FullyParallelSaveStrategyWrapper( 31 | save_strategy, mpu.get_data_parallel_group(with_context_parallel=True) 32 | ) 33 | 34 | # Save model sharded state dicts 35 | async_save_request = dist_checkpointing.save( 36 | sharded_state_dict, 37 | ckpt_path, 38 | sharded_strategy=save_strategy, 39 | async_sharded_save=async_save, 40 | validate_access_integrity=validate_sharding_integrity, 41 | ) 42 | 43 | return async_save_request 44 | 45 | 46 | def load_dist_checkpointing(sharded_state_dict, ckpt_dir): 47 | # Get checkpointing strategies 48 | load_strategy = get_default_load_sharded_strategy(ckpt_dir) 49 | load_strategy = FullyParallelLoadStrategyWrapper( 50 | load_strategy, mpu.get_data_parallel_group(with_context_parallel=True) 51 | ) 52 | 53 | # Load model sharded state dicts 54 | state_dict = dist_checkpointing.load(sharded_state_dict, ckpt_dir, sharded_strategy=load_strategy) 55 | 56 | return state_dict 57 | -------------------------------------------------------------------------------- /verl/workers/actor/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for Actor 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | from typing import Dict 20 | 21 | import torch 22 | 23 | from verl import DataProto 24 | 25 | __all__ = ["BasePPOActor"] 26 | 27 | 28 | class BasePPOActor(ABC): 29 | def __init__(self, config): 30 | """The base class for PPO actor 31 | 32 | Args: 33 | config (DictConfig): a config passed to the PPOActor. We expect the type to be 34 | DictConfig (https://omegaconf.readthedocs.io/), but it can be any namedtuple in general. 35 | """ 36 | super().__init__() 37 | self.config = config 38 | 39 | @abstractmethod 40 | def compute_log_prob(self, data: DataProto) -> torch.Tensor: 41 | """Compute logits given a batch of data. 42 | 43 | Args: 44 | data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```, 45 | ```attention_mask``` and ```position_ids```. 46 | 47 | Returns: 48 | DataProto: a DataProto containing the key ```log_probs``` 49 | 50 | 51 | """ 52 | pass 53 | 54 | @abstractmethod 55 | def update_policy(self, data: DataProto) -> Dict: 56 | """Update the policy with an iterator of DataProto 57 | 58 | Args: 59 | data (DataProto): an iterator over the DataProto that returns by 60 | ```make_minibatch_iterator``` 61 | 62 | Returns: 63 | Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model 64 | such as ```loss```, ```grad_norm```, etc,. 65 | 66 | """ 67 | pass 68 | -------------------------------------------------------------------------------- /search/retrieval/read_npz_simple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Simple NPZ file reader 4 | 5 | Used to read a single NPZ file and view its contents 6 | """ 7 | 8 | import numpy as np 9 | import argparse 10 | 11 | 12 | def read_npz_file(npz_path): 13 | """ 14 | Read NPZ file and display information 15 | 16 | Args: 17 | npz_path: NPZ file path 18 | """ 19 | print(f"Reading file: {npz_path}") 20 | 21 | data = np.load(npz_path) 22 | 23 | print(f"Keys in file: {list(data.keys())}") 24 | 25 | if "embeddings" in data: 26 | embeddings = data["embeddings"] 27 | 28 | print(f"\nEmbeddings info:") 29 | print(f" Shape: {embeddings.shape}") 30 | print(f" Data type: {embeddings.dtype}") 31 | print(f" Memory usage: {embeddings.nbytes / (1024*1024):.1f} MB") 32 | 33 | if len(embeddings) > 0: 34 | norms = np.linalg.norm(embeddings, axis=1) 35 | means = np.mean(embeddings, axis=1) 36 | 37 | print(f" L2 norm - mean: {np.mean(norms):.6f}, std: {np.std(norms):.6f}") 38 | print(f" L2 norm - min: {np.min(norms):.6f}, max: {np.max(norms):.6f}") 39 | print(f" Vector mean - mean: {np.mean(means):.6f}, std: {np.std(means):.6f}") 40 | 41 | n_show = min(3, len(embeddings)) 42 | print(f"\nFirst {n_show} vectors:") 43 | for i in range(n_show): 44 | print(f" Vector {i}: norm={np.linalg.norm(embeddings[i]):.6f}, first 5 values={embeddings[i][:5]}") 45 | else: 46 | print("'embeddings' key not found in file") 47 | 48 | for key in data.keys(): 49 | arr = data[key] 50 | print(f"\nKey '{key}':") 51 | print(f" Shape: {arr.shape}") 52 | print(f" Data type: {arr.dtype}") 53 | if arr.size <= 10: 54 | print(f" Values: {arr}") 55 | else: 56 | print(f" First 10 values: {arr.flat[:10]}") 57 | 58 | 59 | def main(): 60 | parser = argparse.ArgumentParser(description="Read NPZ file") 61 | parser.add_argument("npz_file", help="NPZ file path") 62 | 63 | args = parser.parse_args() 64 | 65 | try: 66 | read_npz_file(args.npz_file) 67 | except Exception as e: 68 | print(f"Error: {e}") 69 | return 1 70 | 71 | return 0 72 | 73 | 74 | if __name__ == "__main__": 75 | exit(main()) 76 | -------------------------------------------------------------------------------- /verl/experimental/agent_loop/single_turn_agent_loop.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import logging 15 | import os 16 | from typing import Any, Dict, List 17 | from uuid import uuid4 18 | 19 | from verl.experimental.agent_loop.agent_loop import AgentLoopBase, AgentLoopOutput 20 | from verl.utils.profiler import simple_timer 21 | 22 | logger = logging.getLogger(__file__) 23 | logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN")) 24 | 25 | 26 | class SingleTurnAgentLoop(AgentLoopBase): 27 | """Naive agent loop that only do single turn chat completion.""" 28 | 29 | def __init__(self, config, server_manager, tokenizer): 30 | super().__init__(config, server_manager, tokenizer) 31 | self.prompt_length = config.actor_rollout_ref.rollout.prompt_length 32 | self.response_length = config.actor_rollout_ref.rollout.response_length 33 | 34 | async def run(self, messages: List[Dict[str, Any]], sampling_params: Dict[str, Any]) -> AgentLoopOutput: 35 | metrics = {} 36 | request_id = uuid4().hex 37 | prompt_ids = await self.loop.run_in_executor( 38 | None, lambda: self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True) 39 | ) 40 | 41 | with simple_timer("generate_sequences", metrics): 42 | response_ids = await self.server_manager.generate( 43 | request_id=request_id, prompt_ids=prompt_ids, sampling_params=sampling_params 44 | ) 45 | response_mask = [1] * len(response_ids) 46 | 47 | output = AgentLoopOutput( 48 | prompt_ids=prompt_ids, 49 | response_ids=response_ids[: self.response_length], 50 | response_mask=response_mask[: self.response_length], 51 | num_turns=2, 52 | metrics=metrics, 53 | ) 54 | return output 55 | -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Borrowed from: https://huggingface.co/spaces/codeparrot/apps_metric/blob/main/utils.py 16 | 17 | import multiprocessing 18 | import os 19 | import sys 20 | import traceback 21 | from typing import Optional 22 | 23 | from .testing_util import run_test 24 | 25 | 26 | def _temp_run(sample, generation, debug, result, metadata_list, timeout): 27 | with open(os.devnull, "w") as devnull: 28 | sys.stdout = devnull 29 | sys.stderr = devnull 30 | try: 31 | res, metadata = run_test(in_outs=sample, test=generation, debug=debug, timeout=timeout) 32 | result.append(res) 33 | metadata_list.append(metadata) 34 | except Exception: 35 | # print(e) # some tracebacks are extremely long. 36 | traceback.print_exc(10) 37 | result.append([-1 for i in range(len(sample["inputs"]))]) 38 | metadata_list.append({}) 39 | 40 | 41 | def check_correctness(in_outs: Optional[dict], generation, timeout=10, debug=True): 42 | """Check correctness of code generation with a global timeout. 43 | The global timeout is to catch some extreme/rare cases not handled by the timeouts 44 | inside `run_test`""" 45 | 46 | manager = multiprocessing.Manager() 47 | result = manager.list() 48 | metadata_list = manager.list() 49 | p = multiprocessing.Process(target=_temp_run, args=(in_outs, generation, debug, result, metadata_list, timeout)) 50 | p.start() 51 | p.join(timeout=timeout + 1) 52 | if p.is_alive(): 53 | p.kill() 54 | # p.terminate() 55 | if not result: 56 | # consider that all tests failed 57 | result = [[-1 for i in range(len(in_outs["inputs"]))]] 58 | if debug: 59 | print("global timeout") 60 | return result[0], metadata_list 61 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from verl.single_controller.base import ResourcePool, WorkerGroup 18 | 19 | from .worker import DistGlobalInfo, DistRankInfo 20 | 21 | 22 | class MegatronWorkerGroup(WorkerGroup): 23 | def __init__(self, resource_pool: ResourcePool, **kwargs): 24 | super().__init__(resource_pool=resource_pool, **kwargs) 25 | self._megatron_rank_info = None 26 | self._megatron_global_info: DistGlobalInfo = None 27 | 28 | def init_megatron(self, default_megatron_kwargs: Dict = None): 29 | raise NotImplementedError("MegatronWorkerGroup.init_megatron should be overwritten") 30 | 31 | def get_megatron_rank_info(self, rank: int) -> DistRankInfo: 32 | assert 0 <= rank < self.world_size, f"rank must be from [0, world_size), Got {rank}" 33 | return self._megatron_rank_info[rank] 34 | 35 | @property 36 | def tp_size(self): 37 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 38 | return self._megatron_global_info.tp_size 39 | 40 | @property 41 | def dp_size(self): 42 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 43 | return self._megatron_global_info.dp_size 44 | 45 | @property 46 | def pp_size(self): 47 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 48 | return self._megatron_global_info.pp_size 49 | 50 | @property 51 | def cp_size(self): 52 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 53 | return self._megatron_global_info.cp_size 54 | 55 | def get_megatron_global_info(self): 56 | return self._megatron_global_info 57 | -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | 18 | import pkg_resources 19 | from packaging.version import parse as parse_version 20 | from pkg_resources import DistributionNotFound 21 | 22 | from .protocol import DataProto 23 | from .utils.device import is_npu_available 24 | from .utils.logging_utils import set_basic_config 25 | 26 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 27 | 28 | with open(os.path.join(version_folder, "version/version")) as f: 29 | __version__ = f.read().strip() 30 | 31 | 32 | set_basic_config(level=logging.WARNING) 33 | 34 | 35 | __all__ = ["DataProto", "__version__"] 36 | 37 | if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true": 38 | import importlib 39 | 40 | if importlib.util.find_spec("modelscope") is None: 41 | raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`") 42 | # Patch hub to download models from modelscope to speed up. 43 | from modelscope.utils.hf_util import patch_hub 44 | 45 | patch_hub() 46 | 47 | if is_npu_available: 48 | from .models.transformers import npu_patch as npu_patch 49 | 50 | package_name = "transformers" 51 | required_version_spec = "4.52.4" 52 | try: 53 | installed_version = pkg_resources.get_distribution(package_name).version 54 | installed = parse_version(installed_version) 55 | required = parse_version(required_version_spec) 56 | 57 | if not installed >= required: 58 | raise ValueError( 59 | f"{package_name} version >= {required_version_spec} is required on ASCEND NPU, current version is " 60 | f"{installed}." 61 | ) 62 | except DistributionNotFound as e: 63 | raise ImportError( 64 | f"package {package_name} is not installed, please run pip install {package_name}=={required_version_spec}" 65 | ) from e 66 | -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import is_dataclass 16 | from typing import Any, Dict, Optional, Type, Union 17 | 18 | from omegaconf import DictConfig, OmegaConf 19 | 20 | __all__ = ["omega_conf_to_dataclass"] 21 | 22 | 23 | def omega_conf_to_dataclass(config: Union[DictConfig, dict], dataclass_type: Optional[Type[Any]] = None) -> Any: 24 | """ 25 | Convert an OmegaConf DictConfig to a dataclass. 26 | 27 | Args: 28 | config: The OmegaConf DictConfig or dict to convert. 29 | dataclass_type: The dataclass type to convert to. When dataclass_type is None, 30 | the DictConfig must contain _target_ to be instantiated via hydra.instantiate API. 31 | 32 | Returns: 33 | The dataclass instance. 34 | """ 35 | if dataclass_type is not None and isinstance(config, dataclass_type): 36 | return config 37 | 38 | if dataclass_type is None: 39 | assert "_target_" in config, ( 40 | "When dataclass_type is not provided, config must contain _target_." 41 | "See trainer/config/ppo_trainer.yaml algorithm section for an example." 42 | ) 43 | from hydra.utils import instantiate 44 | 45 | return instantiate(config, _convert_="partial") 46 | 47 | if not is_dataclass(dataclass_type): 48 | raise ValueError(f"{dataclass_type} must be a dataclass") 49 | cfg = OmegaConf.create(config) # in case it's a dict 50 | cfg_from_dataclass = OmegaConf.structured(dataclass_type) 51 | # let cfg override the existing vals in `cfg_from_dataclass` 52 | cfg_merged = OmegaConf.merge(cfg_from_dataclass, cfg) 53 | # now convert to `dataclass_type` 54 | config_object = OmegaConf.to_object(cfg_merged) 55 | return config_object 56 | 57 | 58 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 59 | for key in dictionary: 60 | if hasattr(config, key): 61 | dictionary[key] = getattr(config, key) 62 | -------------------------------------------------------------------------------- /verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Adapted from Cruise. 16 | """ 17 | 18 | from typing import Union 19 | 20 | import torch 21 | 22 | HALF_LIST = [16, "16", "fp16", "float16", torch.float16] 23 | FLOAT_LIST = [32, "32", "fp32", "float32", torch.float32] 24 | BFLOAT_LIST = ["bf16", "bfloat16", torch.bfloat16] 25 | 26 | 27 | class PrecisionType: 28 | """Type of precision used. 29 | 30 | >>> PrecisionType.HALF == 16 31 | True 32 | >>> PrecisionType.HALF in (16, "16") 33 | True 34 | """ 35 | 36 | HALF = "16" 37 | FLOAT = "32" 38 | FULL = "64" 39 | BFLOAT = "bf16" 40 | MIXED = "mixed" 41 | 42 | @staticmethod 43 | def supported_type(precision: Union[str, int]) -> bool: 44 | return any(x == precision for x in PrecisionType) 45 | 46 | @staticmethod 47 | def supported_types() -> list[str]: 48 | return [x.value for x in PrecisionType] 49 | 50 | @staticmethod 51 | def is_fp16(precision): 52 | return precision in HALF_LIST 53 | 54 | @staticmethod 55 | def is_fp32(precision): 56 | return precision in FLOAT_LIST 57 | 58 | @staticmethod 59 | def is_bf16(precision): 60 | return precision in BFLOAT_LIST 61 | 62 | @staticmethod 63 | def to_dtype(precision): 64 | if precision in HALF_LIST: 65 | return torch.float16 66 | elif precision in FLOAT_LIST: 67 | return torch.float32 68 | elif precision in BFLOAT_LIST: 69 | return torch.bfloat16 70 | else: 71 | raise RuntimeError(f"unexpected precision: {precision}") 72 | 73 | @staticmethod 74 | def to_str(precision): 75 | if precision == torch.float16: 76 | return "fp16" 77 | elif precision == torch.float32: 78 | return "fp32" 79 | elif precision == torch.bfloat16: 80 | return "bf16" 81 | else: 82 | raise RuntimeError(f"unexpected precision: {precision}") 83 | -------------------------------------------------------------------------------- /verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def get_weight_loader(arch: str): 17 | from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel 18 | 19 | _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = { 20 | "LlamaForCausalLM": load_state_dict_to_megatron_gptmodel, 21 | "Qwen2ForCausalLM": load_state_dict_to_megatron_gptmodel, 22 | } 23 | 24 | if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY: 25 | return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch] 26 | raise ValueError( 27 | f"Model architectures {arch} loader are not supported for now. Supported architectures: " 28 | f"{_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}" 29 | ) 30 | 31 | 32 | def get_weight_saver(arch: str): 33 | from verl.models.mcore.saver import ( 34 | merge_megatron_ckpt_gptmodel, 35 | merge_megatron_ckpt_gptmodel_dpskv3, 36 | merge_megatron_ckpt_gptmodel_mixtral, 37 | merge_megatron_ckpt_gptmodel_qwen2_5_vl, 38 | merge_megatron_ckpt_gptmodel_qwen_moe, 39 | ) 40 | 41 | _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY = { 42 | "LlamaForCausalLM": merge_megatron_ckpt_gptmodel, 43 | "Qwen2ForCausalLM": merge_megatron_ckpt_gptmodel, 44 | "MixtralForCausalLM": merge_megatron_ckpt_gptmodel_mixtral, 45 | "Qwen2MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe, 46 | "Qwen2_5_VLForConditionalGeneration": merge_megatron_ckpt_gptmodel_qwen2_5_vl, 47 | "DeepseekV3ForCausalLM": merge_megatron_ckpt_gptmodel_dpskv3, 48 | "Qwen3ForCausalLM": merge_megatron_ckpt_gptmodel, 49 | "Qwen3MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe, 50 | } 51 | if arch in _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY: 52 | return _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY[arch] 53 | raise ValueError( 54 | f"Model architectures {arch} saver are not supported for now. Supported architectures: " 55 | f"{_MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY.keys()}" 56 | ) 57 | -------------------------------------------------------------------------------- /verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | 18 | def extract_solution(solution_str, method="strict"): 19 | assert method in ["strict", "flexible"] 20 | 21 | if method == "strict": 22 | # this also tests the formatting of the model 23 | solutions = re.findall("#### (\\-?[0-9\\.\\,]+)", solution_str) 24 | if len(solutions) == 0: 25 | final_answer = None 26 | else: 27 | # take the last solution 28 | final_answer = solutions[-1].replace(",", "").replace("$", "") 29 | elif method == "flexible": 30 | answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str) 31 | final_answer = None 32 | if len(answer) == 0: 33 | # no reward is there is no answer 34 | pass 35 | else: 36 | invalid_str = ["", "."] 37 | # find the last number that is not '.' 38 | for final_answer in reversed(answer): 39 | if final_answer not in invalid_str: 40 | break 41 | return final_answer 42 | 43 | 44 | def compute_score(solution_str, ground_truth, method="strict", format_score=0.0, score=1.0): 45 | """The scoring function for GSM8k. 46 | 47 | Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual 48 | Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024. 49 | 50 | Args: 51 | solution_str: the solution text 52 | ground_truth: the ground truth 53 | method: the method to extract the solution, choices are 'strict' and 'flexible' 54 | format_score: the score for the format 55 | score: the score for the correct answer 56 | """ 57 | answer = extract_solution(solution_str=solution_str, method=method) 58 | if answer is None: 59 | return 0 60 | else: 61 | if answer == ground_truth: 62 | return score 63 | else: 64 | return format_score 65 | -------------------------------------------------------------------------------- /utils/install_vllm_sglang_mcore.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | USE_MEGATRON=${USE_MEGATRON:-1} 4 | USE_SGLANG=${USE_SGLANG:-1} 5 | 6 | export MAX_JOBS=32 7 | 8 | echo "1. install inference frameworks and pytorch they need" 9 | if [ $USE_SGLANG -eq 1 ]; then 10 | pip install "sglang[all]==0.4.6.post1" --no-cache-dir && pip install torch-memory-saver --no-cache-dir #--find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python 11 | fi 12 | pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata 13 | 14 | echo "2. install basic packages" 15 | pip install "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \ 16 | "numpy<2.0.0" "pyarrow>=15.0.0" pandas \ 17 | ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \ 18 | pytest py-spy pyext pre-commit ruff 19 | 20 | pip install "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1" 21 | 22 | 23 | echo "3. install FlashAttention and FlashInfer" 24 | # Install flash-attn-2.7.4.post1 (cxx11abi=False) 25 | wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \ 26 | pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl 27 | 28 | # Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False) 29 | # vllm-0.8.3 does not support flashinfer>=0.2.3 30 | # see https://github.com/vllm-project/vllm/pull/15777 31 | wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \ 32 | pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl 33 | 34 | 35 | if [ $USE_MEGATRON -eq 1 ]; then 36 | echo "4. install TransformerEngine and Megatron" 37 | echo "Notice that TransformerEngine installation can take very long time, please be patient" 38 | NVTE_FRAMEWORK=pytorch pip3 install --no-deps git+https://github.com/NVIDIA/TransformerEngine.git@v2.2 39 | pip3 install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.0rc3 40 | fi 41 | 42 | 43 | echo "5. May need to fix opencv" 44 | pip install opencv-python 45 | pip install opencv-fixer && \ 46 | python -c "from opencv_fixer import AutoFix; AutoFix()" 47 | 48 | 49 | if [ $USE_MEGATRON -eq 1 ]; then 50 | echo "6. Install cudnn python package (avoid being overridden)" 51 | pip install nvidia-cudnn-cu12==9.8.0.87 52 | fi 53 | 54 | echo "Successfully installed all packages" 55 | -------------------------------------------------------------------------------- /verl/utils/device.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # This code is inspired by the torchtune. 4 | # https://github.com/pytorch/torchtune/blob/main/torchtune/utils/_device.py 5 | # 6 | # Copyright (c) Meta Platforms, Inc. and affiliates. 7 | # All rights reserved. 8 | # 9 | # This source code is licensed under the BSD-style license in https://github.com/pytorch/torchtune/blob/main/LICENSE 10 | 11 | import logging 12 | 13 | import torch 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def is_torch_npu_available() -> bool: 19 | """Check the availability of NPU""" 20 | try: 21 | import torch_npu # noqa: F401 22 | 23 | return torch.npu.is_available() 24 | except ImportError: 25 | return False 26 | 27 | 28 | is_cuda_available = torch.cuda.is_available() 29 | is_npu_available = is_torch_npu_available() 30 | 31 | 32 | def get_visible_devices_keyword() -> str: 33 | """Function that gets visible devices keyword name. 34 | Returns: 35 | 'CUDA_VISIBLE_DEVICES' or `ASCEND_RT_VISIBLE_DEVICES` 36 | """ 37 | return "CUDA_VISIBLE_DEVICES" if is_cuda_available else "ASCEND_RT_VISIBLE_DEVICES" 38 | 39 | 40 | def get_device_name() -> str: 41 | """Function that gets the torch.device based on the current machine. 42 | This currently only supports CPU, CUDA, NPU. 43 | Returns: 44 | device 45 | """ 46 | if is_cuda_available: 47 | device = "cuda" 48 | elif is_npu_available: 49 | device = "npu" 50 | else: 51 | device = "cpu" 52 | return device 53 | 54 | 55 | def get_torch_device() -> any: 56 | """Return the corresponding torch attribute based on the device type string. 57 | Returns: 58 | module: The corresponding torch device namespace, or torch.cuda if not found. 59 | """ 60 | device_name = get_device_name() 61 | try: 62 | return getattr(torch, device_name) 63 | except AttributeError: 64 | logger.warning(f"Device namespace '{device_name}' not found in torch, try to load torch.cuda.") 65 | return torch.cuda 66 | 67 | 68 | def get_device_id() -> int: 69 | """Return current device id based on the device type. 70 | Returns: 71 | device index 72 | """ 73 | return get_torch_device().current_device() 74 | 75 | 76 | def get_nccl_backend() -> str: 77 | """Return nccl backend type based on the device type. 78 | Returns: 79 | nccl backend type string. 80 | """ 81 | if is_cuda_available: 82 | return "nccl" 83 | elif is_npu_available: 84 | return "hccl" 85 | else: 86 | raise RuntimeError(f"No available nccl backend found on device type {get_device_name()}.") 87 | -------------------------------------------------------------------------------- /verl/tools/mcp_search_tool.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import json 16 | import logging 17 | import os 18 | import re 19 | from typing import Tuple 20 | 21 | from verl.tools.mcp_base_tool import MCPBaseTool 22 | 23 | from .schemas import OpenAIFunctionToolSchema 24 | 25 | logger = logging.getLogger(__name__) 26 | logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN")) 27 | 28 | 29 | class MCPSearchTool(MCPBaseTool): 30 | def __init__(self, config: dict, tool_schema: OpenAIFunctionToolSchema): 31 | super().__init__(config, tool_schema) 32 | 33 | def _parse_tool_result(self, content: list) -> Tuple[str, dict]: 34 | res = "" 35 | res_cnt = 0 36 | query_list = [] 37 | metadata = { 38 | "api_request_error": "", 39 | "status": "unknown", 40 | "total_results": 0, 41 | } 42 | try: 43 | for part in content: 44 | if part.type != "text": 45 | continue 46 | text = part.text.replace("'", '"') 47 | query_match = re.search(r'query"\s*:\s*"([^"]+)"', text) 48 | query = query_match.group(1) if query_match else "" 49 | query_list.append(query) 50 | 51 | title_matches = re.findall(r'"title"\s*:', text) 52 | title_count = len(title_matches) 53 | 54 | results_match = re.search(r'"results"\s*:\s*(\[.*?\])', text, re.DOTALL) 55 | results_content = results_match.group(1) if results_match else "" 56 | 57 | res += results_content 58 | res_cnt += title_count 59 | except json.JSONDecodeError: 60 | err_msg = "json parse error." 61 | logger.error(err_msg) 62 | metadata["api_request_error"] = err_msg 63 | metadata["status"] = "error" 64 | 65 | # update metadata 66 | metadata["status"] = "success" 67 | metadata["queries"] = query_list 68 | metadata["query_count"] = len(query_list) 69 | metadata["total_results"] = res_cnt 70 | return res, metadata 71 | -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import pickle 17 | from typing import Any, List, Optional 18 | 19 | import numpy as np 20 | import torch 21 | import torch.distributed as dist 22 | 23 | from verl.utils.device import get_device_name 24 | 25 | 26 | def broadcast_pyobj( 27 | data: List[Any], 28 | rank: int, 29 | dist_group: Optional[torch.distributed.ProcessGroup] = None, 30 | src: int = 0, 31 | force_cpu_device: bool = False, 32 | ): 33 | """from https://github.com/sgl-project/sglang/blob/844e2f227ab0cce6ef818a719170ce37b9eb1e1b/python/sglang/srt/utils.py#L905 34 | 35 | Broadcast inputs from src rank to all other ranks with torch.dist backend. 36 | The `rank` here refer to the source rank on global process group (regardless 37 | of dist_group argument). 38 | """ 39 | device = torch.device(get_device_name() if not force_cpu_device else "cpu") 40 | 41 | if rank == src: 42 | if len(data) == 0: 43 | tensor_size = torch.tensor([0], dtype=torch.long, device=device) 44 | dist.broadcast(tensor_size, src=src, group=dist_group) 45 | else: 46 | serialized_data = pickle.dumps(data) 47 | size = len(serialized_data) 48 | 49 | tensor_data = torch.ByteTensor(np.frombuffer(serialized_data, dtype=np.uint8)).to(device) 50 | tensor_size = torch.tensor([size], dtype=torch.long, device=device) 51 | 52 | dist.broadcast(tensor_size, src=src, group=dist_group) 53 | dist.broadcast(tensor_data, src=src, group=dist_group) 54 | return data 55 | else: 56 | tensor_size = torch.tensor([0], dtype=torch.long, device=device) 57 | dist.broadcast(tensor_size, src=src, group=dist_group) 58 | size = tensor_size.item() 59 | 60 | if size == 0: 61 | return [] 62 | 63 | tensor_data = torch.empty(size, dtype=torch.uint8, device=device) 64 | dist.broadcast(tensor_data, src=src, group=dist_group) 65 | 66 | serialized_data = bytes(tensor_data.cpu().numpy()) 67 | data = pickle.loads(serialized_data) 68 | return data 69 | -------------------------------------------------------------------------------- /scripts/train_grpo.sh: -------------------------------------------------------------------------------- 1 | 2 | WAND_PROJECT='ReSeek' 3 | 4 | 5 | export BASE_MODEL='your/path/to/Qwen2.5-3B-Instruct' 6 | 7 | export EXPERIMENT_NAME=ReSeek-nq_hotpotqa_train-qwen2.5-3b-it-em-grpo_max_turn1 8 | set -x 9 | 10 | source /usr/local/Ascend/ascend-toolkit/set_env.sh 11 | source /usr/local/Ascend/nnal/atb/set_env.sh 12 | export TORCHDYNAMO_DISABLE=1 13 | export TORCH_COMPILE_DISABLE=1 14 | 15 | TRAIN_DATA_DIR=your/path/to/nq_search 16 | TEST_DATA_DIR=your/path/to/nq_search 17 | TIME_STAMP=$(date +%Y%m%d_%H%M%S) 18 | 19 | PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ 20 | data.train_files=$TRAIN_DATA_DIR/train.parquet \ 21 | data.val_files=$TEST_DATA_DIR/test.parquet \ 22 | data.train_batch_size=512 \ 23 | data.val_batch_size=256 \ 24 | data.max_prompt_length=2048 \ 25 | data.max_response_length=500 \ 26 | data.max_start_length=2048 \ 27 | data.max_obs_length=500 \ 28 | data.shuffle=False \ 29 | algorithm.adv_estimator=grpo \ 30 | actor_rollout_ref.model.path=$BASE_MODEL \ 31 | actor_rollout_ref.model.enable_gradient_checkpointing=true \ 32 | actor_rollout_ref.model.use_remove_padding=True \ 33 | actor_rollout_ref.actor.optim.lr=1e-6 \ 34 | actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \ 35 | actor_rollout_ref.actor.use_kl_loss=true \ 36 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 37 | actor_rollout_ref.actor.ppo_micro_batch_size=64 \ 38 | actor_rollout_ref.actor.fsdp_config.param_offload=true \ 39 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 40 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 41 | actor_rollout_ref.rollout.name=vllm \ 42 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 43 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 44 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 45 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 46 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 47 | actor_rollout_ref.rollout.n_agent=5 \ 48 | actor_rollout_ref.rollout.temperature=1 \ 49 | algorithm.no_think_rl=false \ 50 | trainer.logger=['console','tensorboard'] \ 51 | trainer.balance_batch=false \ 52 | trainer.val_only=true \ 53 | trainer.val_before_train=true \ 54 | trainer.default_hdfs_dir=null \ 55 | trainer.n_gpus_per_node=1 \ 56 | trainer.nnodes=1 \ 57 | trainer.save_freq=100 \ 58 | trainer.test_freq=100 \ 59 | trainer.project_name=$WAND_PROJECT \ 60 | trainer.experiment_name=$EXPERIMENT_NAME \ 61 | trainer.total_epochs=1 \ 62 | trainer.default_hdfs_dir=null \ 63 | reward_model.reward_manager=naive \ 64 | max_turns=1 \ 65 | retriever.url="your/path/to/retrieve" \ 66 | retriever.topk=3 \ 67 | 2>&1 | tee logs/$EXPERIMENT_NAME_$TIME_STAMP.log 68 | # trainer.device=npu 69 | # trainer.total_training_steps=1005 \ -------------------------------------------------------------------------------- /verl/base_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import collections 16 | from dataclasses import fields # Import the fields function to inspect dataclass fields 17 | from typing import Any 18 | 19 | 20 | # BaseConfig class inherits from collections.abc.Mapping, which means it can act like a dictionary 21 | class BaseConfig(collections.abc.Mapping): 22 | """The BaseConfig provides omegaconf DictConfig-like interface for a dataclass config. 23 | 24 | The BaseConfig class implements the Mapping Abstract Base Class. 25 | This allows instances of this class to be used like dictionaries. 26 | """ 27 | 28 | def get(self, key: str, default: Any = None) -> Any: 29 | """Get the value associated with the given key. If the key does not exist, return the default value. 30 | 31 | Args: 32 | key (str): The attribute name to retrieve. 33 | default (Any, optional): The value to return if the attribute does not exist. Defaults to None. 34 | 35 | Returns: 36 | Any: The value of the attribute or the default value. 37 | """ 38 | try: 39 | return getattr(self, key) 40 | except AttributeError: 41 | return default 42 | 43 | def __getitem__(self, key: str): 44 | """Implement the [] operator for the class. Allows accessing attributes like dictionary items. 45 | 46 | Args: 47 | key (str): The attribute name to retrieve. 48 | 49 | Returns: 50 | Any: The value of the attribute. 51 | 52 | Raises: 53 | AttributeError: If the attribute does not exist. 54 | TypeError: If the key type is not string 55 | """ 56 | return getattr(self, key) 57 | 58 | def __iter__(self): 59 | """Implement the iterator protocol. Allows iterating over the attribute names of the instance. 60 | 61 | Yields: 62 | str: The name of each field in the dataclass. 63 | """ 64 | for f in fields(self): 65 | yield f.name 66 | 67 | def __len__(self): 68 | """ 69 | Return the number of fields in the dataclass. 70 | 71 | Returns: 72 | int: The number of fields in the dataclass. 73 | """ 74 | return len(fields(self)) 75 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/linear.py 15 | 16 | 17 | from megatron.core import tensor_parallel 18 | 19 | 20 | class QKVParallelLinear(tensor_parallel.ColumnParallelLinear): 21 | def __init__( 22 | self, 23 | input_size, 24 | num_heads, 25 | num_key_value_heads, 26 | head_dim, 27 | *, 28 | bias=True, 29 | gather_output=True, 30 | skip_bias_add=False, 31 | **kwargs, 32 | ): 33 | # Keep input parameters, and already restrict the head numbers 34 | self.input_size = input_size 35 | self.q_output_size = num_heads * head_dim 36 | self.kv_output_size = num_key_value_heads * head_dim 37 | self.head_dim = head_dim 38 | self.gather_output = gather_output 39 | self.skip_bias_add = skip_bias_add 40 | 41 | input_size = self.input_size 42 | output_size = (num_heads + 2 * num_key_value_heads) * self.head_dim 43 | 44 | super().__init__( 45 | input_size=input_size, 46 | output_size=output_size, 47 | bias=bias, 48 | gather_output=gather_output, 49 | skip_bias_add=skip_bias_add, 50 | **kwargs, 51 | ) 52 | 53 | 54 | class MergedColumnParallelLinear(tensor_parallel.ColumnParallelLinear): 55 | def __init__( 56 | self, 57 | input_size, 58 | gate_ouput_size, 59 | up_output_size, 60 | *, 61 | bias=True, 62 | gather_output=True, 63 | skip_bias_add=False, 64 | **kwargs, 65 | ): 66 | # Keep input parameters, and already restrict the head numbers 67 | self.input_size = input_size 68 | self.output_size = gate_ouput_size + up_output_size 69 | self.gather_output = gather_output 70 | self.skip_bias_add = skip_bias_add 71 | 72 | super().__init__( 73 | input_size=self.input_size, 74 | output_size=self.output_size, 75 | bias=bias, 76 | gather_output=gather_output, 77 | skip_bias_add=skip_bias_add, 78 | **kwargs, 79 | ) 80 | -------------------------------------------------------------------------------- /verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import time 17 | 18 | import ray 19 | from cupy.cuda.nccl import NcclCommunicator, get_unique_id 20 | from ray.util import list_named_actors 21 | 22 | 23 | @ray.remote 24 | class NCCLIDStore: 25 | def __init__(self, nccl_id): 26 | self._nccl_id = nccl_id 27 | 28 | def get(self): 29 | return self._nccl_id 30 | 31 | 32 | def get_nccl_id_store_by_name(name): 33 | all_actors = list_named_actors(all_namespaces=True) 34 | matched_actors = [actor for actor in all_actors if actor.get("name", None) == name] 35 | if len(matched_actors) == 1: 36 | actor = matched_actors[0] 37 | return ray.get_actor(**actor) 38 | elif len(matched_actors) > 1: 39 | logging.warning("multiple actors with same name found: %s", matched_actors) 40 | elif len(matched_actors) == 0: 41 | logging.info("failed to get any actor named %s", name) 42 | return None 43 | 44 | 45 | def create_nccl_communicator_in_ray( 46 | rank: int, world_size: int, group_name: str, max_retries: int = 100, interval_s: int = 5 47 | ): 48 | if rank == 0: 49 | nccl_id = get_unique_id() 50 | nccl_id_store = NCCLIDStore.options(name=group_name).remote(nccl_id) 51 | 52 | assert ray.get(nccl_id_store.get.remote()) == nccl_id 53 | communicator = NcclCommunicator( 54 | ndev=world_size, 55 | commId=nccl_id, 56 | rank=0, 57 | ) 58 | return communicator 59 | else: 60 | for i in range(max_retries): 61 | nccl_id_store = get_nccl_id_store_by_name(group_name) 62 | if nccl_id_store is not None: 63 | logging.info("nccl_id_store %s got", group_name) 64 | nccl_id = ray.get(nccl_id_store.get.remote()) 65 | logging.info("nccl id for %s got: %s", group_name, nccl_id) 66 | communicator = NcclCommunicator( 67 | ndev=world_size, 68 | commId=nccl_id, 69 | rank=rank, 70 | ) 71 | return communicator 72 | logging.info("failed to get nccl_id for %d time, sleep for %d seconds", i + 1, interval_s) 73 | time.sleep(interval_s) 74 | -------------------------------------------------------------------------------- /verl/tools/schemas.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import json 16 | from typing import Any, Literal 17 | 18 | from pydantic import BaseModel 19 | 20 | 21 | class OpenAIFunctionPropertySchema(BaseModel): 22 | """The schema of a parameter in OpenAI format.""" 23 | 24 | type: str 25 | description: str | None = None 26 | enum: list[str] | None = None 27 | 28 | 29 | class OpenAIFunctionParametersSchema(BaseModel): 30 | """The schema of parameters in OpenAI format.""" 31 | 32 | type: str 33 | properties: dict[str, OpenAIFunctionPropertySchema] 34 | required: list[str] 35 | 36 | 37 | class OpenAIFunctionSchema(BaseModel): 38 | """The schema of a function in OpenAI format.""" 39 | 40 | name: str 41 | description: str 42 | parameters: OpenAIFunctionParametersSchema 43 | strict: bool = False 44 | 45 | 46 | class OpenAIFunctionToolSchema(BaseModel): 47 | """The schema of a tool in OpenAI format.""" 48 | 49 | type: str 50 | function: OpenAIFunctionSchema 51 | 52 | 53 | class OpenAIFunctionParsedSchema(BaseModel): 54 | """The parsed schema of a tool in OpenAI format.""" 55 | 56 | name: str 57 | arguments: str # JSON string 58 | 59 | 60 | class OpenAIFunctionCallSchema(BaseModel): 61 | """The parsed schema of a tool in OpenAI format.""" 62 | 63 | name: str 64 | arguments: dict[str, Any] 65 | 66 | @staticmethod 67 | def from_openai_function_parsed_schema( 68 | parsed_schema: OpenAIFunctionParsedSchema, 69 | ) -> tuple["OpenAIFunctionCallSchema", bool]: 70 | has_decode_error = False 71 | try: 72 | arguments = json.loads(parsed_schema.arguments) 73 | except json.JSONDecodeError: 74 | arguments = {} 75 | has_decode_error = True 76 | # If the arguments is not a dict, it means the arguments is not a valid JSON string 77 | if not isinstance(arguments, dict): 78 | arguments = {} 79 | has_decode_error = True 80 | 81 | return OpenAIFunctionCallSchema(name=parsed_schema.name, arguments=arguments), has_decode_error 82 | 83 | 84 | class OpenAIFunctionToolCall(BaseModel): 85 | """The tool call in OpenAI format.""" 86 | 87 | id: str 88 | type: Literal["function"] = "function" 89 | function: OpenAIFunctionCallSchema 90 | -------------------------------------------------------------------------------- /verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | from megatron.core import parallel_state as mpu 18 | 19 | from .sequence_parallel import pad_to_sequence_parallel 20 | 21 | 22 | def compute_transformers_input_shapes(batches, meta_info): 23 | from flash_attn.bert_padding import unpad_input # flash 2 is a must for Megatron 24 | 25 | # pre-compute input shapes for each micro-batch at each pp stage 26 | input_shapes = [] 27 | for model_inputs in batches: 28 | input_ids = model_inputs["input_ids"] 29 | attention_mask = model_inputs["attention_mask"] 30 | input_ids_rmpad = unpad_input(input_ids.unsqueeze(dim=-1), attention_mask)[0] # (total_nnz, 1) 31 | if meta_info["sequence_parallel"]: 32 | input_ids_rmpad = pad_to_sequence_parallel(input_ids_rmpad) 33 | # compute shapes for model_inputs 34 | input_shapes.append( 35 | torch.Size( 36 | [ 37 | input_ids_rmpad.shape[0] // mpu.get_tensor_model_parallel_world_size(), 38 | 1, 39 | meta_info["hidden_size"], 40 | ] 41 | ) 42 | ) 43 | else: 44 | # compute shapes for model_inputs 45 | input_shapes.append(torch.Size([input_ids_rmpad.shape[0], 1, meta_info["hidden_size"]])) 46 | return input_shapes 47 | 48 | 49 | def make_batch_generator(batches, vpp_size): 50 | """ 51 | Creates a batch generator suitable for Megatron pipeline parallelism, 52 | handling virtual pipeline parallelism (VPP). 53 | 54 | If VPP is used (vpp_size > 1), it duplicates the batch iterator for each 55 | virtual pipeline stage. Otherwise, it returns a single iterator. 56 | 57 | Args: 58 | batches: An iterable (e.g., list) of micro-batches. 59 | vpp_size (int): The virtual pipeline model parallel size. 60 | 61 | Returns: 62 | An iterator or a list of iterators over the micro-batches. 63 | """ 64 | if vpp_size > 1: 65 | # has vpp 66 | batch_generator = [batches] * vpp_size # number of vpp chunks 67 | batch_generator = [iter(b) for b in batch_generator] 68 | else: 69 | # no vpp 70 | batch_generator = iter(batches) 71 | return batch_generator 72 | -------------------------------------------------------------------------------- /verl/trainer/config/algorithm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass, field 16 | from typing import Optional 17 | 18 | from verl.base_config import BaseConfig 19 | 20 | 21 | @dataclass(frozen=True) 22 | class KLControlConfig(BaseConfig): 23 | """Configuration for KL control.""" 24 | 25 | type: str = "fixed" # "fixed" or "adaptive" 26 | kl_coef: float = 0.001 # Initial coefficient for KL penalty 27 | horizon: int = 10000 # Horizon value for adaptive controller 28 | target_kl: float = 0.1 # Target KL divergence for adaptive controller 29 | 30 | 31 | @dataclass(frozen=True) 32 | class PFPPOConfig(BaseConfig): 33 | """Configuration for preference feedback PPO.""" 34 | 35 | reweight_method: str = "pow" # "pow", "max_min", or "max_random" 36 | weight_pow: float = 2.0 # Power used for weight scaling in "pow" method 37 | 38 | 39 | @dataclass(frozen=True) 40 | class FilterGroupsConfig(BaseConfig): 41 | """Configuration for filter groups (used in DAPO and Entropy).""" 42 | 43 | enable: bool = False # Whether to enable filter groups 44 | metric: Optional[str] = None # Metric to use for filtering: "acc", "score", "seq_reward", "seq_final_reward", etc. 45 | max_num_gen_batches: int = 0 # Non-positive values mean no upper limit 46 | 47 | 48 | @dataclass(frozen=True) 49 | class AlgoConfig(BaseConfig): 50 | """Configuration for the algorithm.""" 51 | 52 | gamma: float = 1.0 # Discount factor for future rewards 53 | lam: float = 1.0 # Trade-off between bias and variance in the GAE estimator 54 | adv_estimator: str = "gae" # Advantage estimator type: "gae", "grpo", "reinforce_plus_plus", etc. 55 | norm_adv_by_std_in_grpo: bool = True # Whether to normalize advantages by std (specific to GRPO) 56 | use_kl_in_reward: bool = False # Whether to enable in-reward KL penalty 57 | kl_penalty: str = "kl" # How to estimate KL divergence: "kl", "abs", "mse", "low_var_kl", or "full" 58 | kl_ctrl: KLControlConfig = field(default_factory=KLControlConfig) # KL control configuration 59 | use_pf_ppo: bool = False # Whether to enable preference feedback PPO 60 | pf_ppo: Optional[PFPPOConfig] = None # Preference feedback PPO settings 61 | 62 | # Filter groups parameters (used in DAPO and Entropy) 63 | filter_groups: Optional[FilterGroupsConfig] = None # Filter groups configuration 64 | no_think_rl: bool = True # Whether to enable no-think RL 65 | -------------------------------------------------------------------------------- /verl/trainer/main_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Offline evaluate the performance of a generated file using reward model and ground truth verifier. 16 | The input is a parquet file that contains N generated sequences and (optional) the ground truth. 17 | 18 | """ 19 | 20 | from collections import defaultdict 21 | 22 | import hydra 23 | import numpy as np 24 | import pandas as pd 25 | import ray 26 | from tqdm import tqdm 27 | 28 | from verl.trainer.ppo.reward import get_custom_reward_fn 29 | from verl.utils.fs import copy_to_local 30 | 31 | 32 | @ray.remote 33 | def process_item(reward_fn, data_source, response_lst, reward_data): 34 | ground_truth = reward_data["ground_truth"] 35 | score_lst = [reward_fn(data_source, r, ground_truth) for r in response_lst] 36 | return data_source, np.mean(score_lst) 37 | 38 | 39 | @hydra.main(config_path="config", config_name="evaluation", version_base=None) 40 | def main(config): 41 | local_path = copy_to_local(config.data.path, use_shm=config.data.get("use_shm", False)) 42 | dataset = pd.read_parquet(local_path) 43 | responses = dataset[config.data.response_key] 44 | data_sources = dataset[config.data.data_source_key] 45 | reward_model_data = dataset[config.data.reward_model_key] 46 | 47 | total = len(dataset) 48 | 49 | # Initialize Ray 50 | if not ray.is_initialized(): 51 | ray.init(num_cpus=config.ray_init.num_cpus) 52 | 53 | # evaluate test_score based on data source 54 | data_source_reward = defaultdict(list) 55 | compute_score = get_custom_reward_fn(config) 56 | 57 | # Create remote tasks 58 | remote_tasks = [ 59 | process_item.remote(compute_score, data_sources[i], responses[i], reward_model_data[i]) for i in range(total) 60 | ] 61 | 62 | # Process results as they come in 63 | with tqdm(total=total) as pbar: 64 | while len(remote_tasks) > 0: 65 | # Use ray.wait to get completed tasks 66 | done_ids, remote_tasks = ray.wait(remote_tasks) 67 | for result_id in done_ids: 68 | data_source, score = ray.get(result_id) 69 | data_source_reward[data_source].append(score) 70 | pbar.update(1) 71 | 72 | metric_dict = {} 73 | for data_source, rewards in data_source_reward.items(): 74 | metric_dict[f"test_score/{data_source}"] = np.mean(rewards) 75 | 76 | print(metric_dict) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains a resharding manager that binds weights from FSDP zero3 to XPerfGPT 16 | """ 17 | 18 | from torch.distributed.device_mesh import DeviceMesh 19 | 20 | from verl import DataProto 21 | from verl.protocol import all_gather_data_proto 22 | from verl.utils.ulysses import get_ulysses_sequence_parallel_group, set_ulysses_sequence_parallel_group 23 | 24 | from .base import BaseShardingManager 25 | 26 | 27 | class FSDPUlyssesShardingManager(BaseShardingManager): 28 | """ 29 | Sharding manager to support data resharding when using FSDP + Ulysses 30 | """ 31 | 32 | def __init__(self, device_mesh: DeviceMesh): 33 | super().__init__() 34 | self.device_mesh = device_mesh 35 | self.seed_offset = 12345 36 | 37 | def __enter__(self): 38 | if self.device_mesh is not None: 39 | # We have a global SP group 40 | # so we have to change to use model-specific sp group 41 | self.prev_sp_group = get_ulysses_sequence_parallel_group() 42 | set_ulysses_sequence_parallel_group(self.device_mesh["sp"].get_group()) 43 | # TODO: check how to set seed for each model 44 | 45 | def __exit__(self, exc_type, exc_value, traceback): 46 | # restore random states 47 | if self.device_mesh is not None: 48 | # revert to previous sp group 49 | set_ulysses_sequence_parallel_group(self.prev_sp_group) 50 | # TODO: check how to set seed for each model 51 | 52 | def preprocess_data(self, data: DataProto) -> DataProto: 53 | """ 54 | AllGather data from sp region 55 | This is because the data is first sharded along the FSDP dimension as we utilize the DP_COMPUTE 56 | In Ulysses, we need to make sure the same data is used across a SP group 57 | """ 58 | if self.device_mesh is not None: 59 | group = self.device_mesh["sp"].get_group() 60 | 61 | all_gather_data_proto(data=data, process_group=group) 62 | return data 63 | 64 | def postprocess_data(self, data: DataProto) -> DataProto: 65 | """ 66 | Split the data to follow FSDP partition 67 | """ 68 | if self.device_mesh is not None: 69 | sp_size = self.device_mesh["sp"].size() 70 | sp_rank = self.device_mesh["sp"].get_local_rank() 71 | data = data.chunk(chunks=sp_size)[sp_rank] 72 | return data 73 | -------------------------------------------------------------------------------- /verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from megatron.core.optimizer import OptimizerConfig 17 | from megatron.core.optimizer import get_megatron_optimizer as get_megatron_optimizer_native 18 | from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler 19 | 20 | 21 | def get_megatron_optimizer( 22 | model, 23 | config: OptimizerConfig, 24 | no_weight_decay_cond=None, 25 | scale_lr_cond=None, 26 | lr_mult=1.0, 27 | ): 28 | # Base optimizer. 29 | return get_megatron_optimizer_native( 30 | config=config, 31 | model_chunks=model, 32 | no_weight_decay_cond=no_weight_decay_cond, 33 | scale_lr_cond=scale_lr_cond, 34 | lr_mult=lr_mult, 35 | ) 36 | 37 | 38 | def get_megatron_optimizer_param_scheduler( 39 | optimizer, 40 | config, 41 | ): 42 | """ 43 | Get the optimizer parameter scheduler for Megatron. 44 | """ 45 | if config.get("lr_decay_steps", None) is None: 46 | config.lr_decay_steps = config.total_training_steps 47 | wsd_decay_steps = None 48 | if config.get("lr_wsd_decay_steps", None) is not None: 49 | wsd_decay_steps = config.lr_wsd_decay_steps 50 | if config.get("lr_warmup_steps_ratio", None) is not None and ( 51 | config.get("lr_warmup_steps", None) is None or config.lr_warmup_steps <= 0 52 | ): 53 | config.lr_warmup_steps = int(config.lr_warmup_steps_ratio * config.lr_decay_steps) 54 | 55 | opt_param_scheduler = OptimizerParamScheduler( 56 | optimizer, 57 | init_lr=config.lr_warmup_init, 58 | max_lr=config.lr, 59 | min_lr=config.min_lr, 60 | lr_warmup_steps=config.lr_warmup_steps, 61 | lr_decay_steps=config.lr_decay_steps, 62 | lr_decay_style=config.lr_decay_style, 63 | start_wd=config.weight_decay, 64 | end_wd=config.weight_decay, 65 | wd_incr_steps=config.total_training_steps, 66 | wd_incr_style=config.weight_decay_incr_style, 67 | use_checkpoint_opt_param_scheduler=config.use_checkpoint_opt_param_scheduler, 68 | override_opt_param_scheduler=(not config.use_checkpoint_opt_param_scheduler), 69 | wsd_decay_steps=wsd_decay_steps, 70 | lr_wsd_decay_style=config.lr_wsd_decay_style, 71 | ) 72 | 73 | return opt_param_scheduler 74 | 75 | 76 | def get_megatron_last_lr(optimizer): 77 | """ 78 | Get the last learning rate from the optimizer parameter scheduler. 79 | """ 80 | return optimizer.param_groups[0]["lr"] 81 | -------------------------------------------------------------------------------- /verl/interactions/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023-2024 SGLang Team 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | from typing import Any, Dict, List, Optional, Tuple 17 | from uuid import uuid4 18 | 19 | 20 | class BaseInteraction: 21 | def __init__(self, config: Dict[str, Any]): 22 | self.config = config 23 | self.name: str = config.get("name", "interaction_agent") # More general agent default role name 24 | 25 | async def start_interaction(self, instance_id: Optional[str] = None, **kwargs) -> str: 26 | """Create a tool instance. 27 | 28 | Args: 29 | instance_id: The instance id of the tool. 30 | 31 | Returns: 32 | The instance id of the tool. 33 | """ 34 | if instance_id is None: 35 | return str(uuid4()) 36 | else: 37 | return instance_id 38 | 39 | async def generate_response( 40 | self, instance_id: str, messages: List[Dict[str, Any]], **kwargs 41 | ) -> Tuple[bool, str, float, Dict[str, Any]]: # More clear response generation method 42 | """ 43 | Generates a response for the current turn of interaction. 44 | Returns a tuple containing: 45 | - should_terminate_sequence (bool): True if the interaction sequence should end. 46 | - response_content (str): The textual content of the response. 47 | - current_turn_score (float): The score for this specific turn/response. 48 | - additional_data (dict): Any extra information or metadata. 49 | """ 50 | should_terminate_sequence: bool = False # if True, end rollout 51 | response_content: str = "Your current result seems acceptable." 52 | current_turn_score: float = 0.8 53 | additional_data: Dict[str, Any] = {} 54 | return should_terminate_sequence, response_content, current_turn_score, additional_data 55 | 56 | async def calculate_score(self) -> float: # More clear score calculation method 57 | """ 58 | Calculates a score for the interaction, 59 | potentially considering aspects like partial exposure & in-context task switching. 60 | should be invoke at turn-level 61 | """ 62 | # ...implement the logic to calculate turn-level score... 63 | score = 0.0 64 | return score 65 | 66 | async def finalize_interaction(self) -> None: # More clear interaction end and resource release method 67 | """ 68 | Finalizes the interaction session and releases any associated state or resources. 69 | Simulates: release state 70 | """ 71 | # ...implement the logic to release state... 72 | pass 73 | --------------------------------------------------------------------------------