├── docs
    ├── .nojekyll
    └── static
    │   ├── pdfs
    │       └── sample.pdf
    │   └── images
    │       ├── cases.png
    │       ├── hf-logo.png
    │       ├── datapipe.png
    │       ├── framework.png
    │       ├── eval_prior.png
    │       ├── leaderboard.png
    │       └── main_result.png
├── search
    ├── __init__.py
    ├── llm_agent
    │   └── __init__.py
    ├── install.sh
    ├── retrieval
    │   ├── retrieval_request.py
    │   ├── build_index.sh
    │   ├── build_index_vllm_api.sh
    │   └── read_npz_simple.py
    └── retrieval_launch.sh
├── verl
    ├── version
    │   └── version
    ├── trainer
    │   ├── runtime_env.yaml
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── main_ppo.cpython-310.pyc
    │   │   └── constants_ppo.cpython-310.pyc
    │   ├── ppo
    │   │   ├── __pycache__
    │   │   │   ├── reward.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── core_algos.cpython-310.pyc
    │   │   │   ├── metric_utils.cpython-310.pyc
    │   │   │   └── ray_trainer.cpython-310.pyc
    │   │   └── __init__.py
    │   ├── config
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── algorithm.cpython-310.pyc
    │   │   ├── evaluation.yaml
    │   │   ├── __init__.py
    │   │   ├── generation.yaml
    │   │   ├── sft_trainer.yaml
    │   │   └── algorithm.py
    │   ├── __init__.py
    │   ├── constants_ppo.py
    │   └── main_eval.py
    ├── __pycache__
    │   ├── __init__.cpython-310.pyc
    │   ├── __init__.cpython-313.pyc
    │   ├── protocol.cpython-310.pyc
    │   ├── protocol.cpython-313.pyc
    │   └── base_config.cpython-310.pyc
    ├── utils
    │   ├── __pycache__
    │   │   ├── fs.cpython-310.pyc
    │   │   ├── config.cpython-310.pyc
    │   │   ├── device.cpython-310.pyc
    │   │   ├── model.cpython-310.pyc
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── hdfs_io.cpython-310.pyc
    │   │   ├── tracking.cpython-310.pyc
    │   │   ├── ulysses.cpython-310.pyc
    │   │   ├── fsdp_utils.cpython-310.pyc
    │   │   ├── net_utils.cpython-310.pyc
    │   │   ├── ray_utils.cpython-310.pyc
    │   │   ├── tokenizer.cpython-310.pyc
    │   │   ├── vllm_utils.cpython-310.pyc
    │   │   ├── flops_counter.cpython-310.pyc
    │   │   ├── import_utils.cpython-310.pyc
    │   │   ├── logging_utils.cpython-310.pyc
    │   │   ├── py_functional.cpython-310.pyc
    │   │   ├── torch_dtypes.cpython-310.pyc
    │   │   ├── seqlen_balancing.cpython-310.pyc
    │   │   ├── torch_functional.cpython-310.pyc
    │   │   └── activation_offload.cpython-310.pyc
    │   ├── metric
    │   │   ├── __pycache__
    │   │   │   ├── utils.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── dataset
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── sampler.cpython-310.pyc
    │   │   │   ├── rl_dataset.cpython-310.pyc
    │   │   │   ├── rm_dataset.cpython-310.pyc
    │   │   │   └── sft_dataset.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── sampler.py
    │   ├── debug
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   └── performance.py
    │   ├── logger
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── aggregate_logger.cpython-310.pyc
    │   │   └── __init__.py
    │   ├── profiler
    │   │   ├── __pycache__
    │   │   │   ├── config.cpython-310.pyc
    │   │   │   ├── profile.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── performance.cpython-310.pyc
    │   │   ├── empty_annotations.py
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── reward_score
    │   │   ├── __pycache__
    │   │   │   ├── gsm8k.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── search_r1_like_qa_em.cpython-310.pyc
    │   │   │   ├── search_r1_like_qa_em_s3.cpython-310.pyc
    │   │   │   └── search_r1_like_qa_em_s4.cpython-310.pyc
    │   │   ├── prime_code
    │   │   │   ├── README.md
    │   │   │   └── utils.py
    │   │   ├── math_batch.py
    │   │   ├── geo3k.py
    │   │   ├── math_verify.py
    │   │   └── gsm8k.py
    │   ├── checkpoint
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── checkpoint_manager.cpython-310.pyc
    │   │   │   └── fsdp_checkpoint_manager.cpython-310.pyc
    │   │   └── __init__.py
    │   ├── megatron
    │   │   ├── __init__.py
    │   │   ├── memory.py
    │   │   ├── sequence_parallel.py
    │   │   ├── dist_checkpointing.py
    │   │   ├── pipeline_parallel.py
    │   │   └── optimizer.py
    │   ├── experimental
    │   │   └── __init__.py
    │   ├── rendezvous
    │   │   ├── __init__.py
    │   │   └── ray_backend.py
    │   ├── __init__.py
    │   ├── logging_utils.py
    │   ├── kernel
    │   │   └── __init__.py
    │   ├── distributed.py
    │   ├── net_utils.py
    │   ├── config.py
    │   ├── torch_dtypes.py
    │   └── device.py
    ├── tools
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── schemas.cpython-310.pyc
    │   │   ├── base_tool.cpython-310.pyc
    │   │   └── search_tool.cpython-310.pyc
    │   ├── utils
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── tool_registry.cpython-310.pyc
    │   │   │   └── search_r1_like_utils.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   └── mcp_clients
    │   │   │   └── utils.py
    │   ├── __init__.py
    │   ├── mcp_search_tool.py
    │   └── schemas.py
    ├── interactions
    │   ├── __pycache__
    │   │   ├── base.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    │   ├── utils
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── interaction_registry.cpython-310.pyc
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── base.py
    ├── models
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   └── registry.cpython-310.pyc
    │   ├── transformers
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── npu_patch.cpython-310.pyc
    │   │   │   └── monkey_patch.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   └── npu_patch.py
    │   ├── __init__.py
    │   ├── llama
    │   │   ├── __init__.py
    │   │   └── megatron
    │   │   │   ├── checkpoint_utils
    │   │   │       └── __init__.py
    │   │   │   ├── __init__.py
    │   │   │   └── layers
    │   │   │       ├── __init__.py
    │   │   │       └── parallel_rmsnorm.py
    │   ├── qwen2
    │   │   ├── __init__.py
    │   │   └── megatron
    │   │   │   ├── checkpoint_utils
    │   │   │       └── __init__.py
    │   │   │   ├── layers
    │   │   │       ├── __init__.py
    │   │   │       ├── parallel_rmsnorm.py
    │   │   │       └── parallel_linear.py
    │   │   │   └── __init__.py
    │   ├── mcore
    │   │   ├── __init__.py
    │   │   ├── qwen2_5_vl
    │   │   │   └── __init__.py
    │   │   └── mbridge.py
    │   ├── README.md
    │   ├── registry.py
    │   └── weight_loader_registry.py
    ├── workers
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   └── fsdp_workers.cpython-310.pyc
    │   ├── actor
    │   │   ├── __pycache__
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── dp_actor.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── critic
    │   │   ├── __pycache__
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── dp_critic.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── rollout
    │   │   ├── __pycache__
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── schemas.cpython-310.pyc
    │   │   │   └── hf_rollout.cpython-310.pyc
    │   │   ├── naive
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   └── naive_rollout.cpython-310.pyc
    │   │   │   └── __init__.py
    │   │   ├── sglang_rollout
    │   │   │   ├── __pycache__
    │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   └── sglang_rollout.cpython-310.pyc
    │   │   │   ├── __init__.py
    │   │   │   └── utils.py
    │   │   ├── vllm_rollout
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   └── vllm_rollout_spmd.cpython-310.pyc
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── reward_manager
    │   │   ├── __pycache__
    │   │   │   ├── dapo.cpython-310.pyc
    │   │   │   ├── batch.cpython-310.pyc
    │   │   │   ├── naive.cpython-310.pyc
    │   │   │   ├── prime.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── registry.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   └── registry.py
    │   ├── sharding_manager
    │   │   ├── __pycache__
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── fsdp_vllm.cpython-310.pyc
    │   │   │   ├── fsdp_sglang.cpython-310.pyc
    │   │   │   └── fsdp_ulysses.cpython-310.pyc
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── fsdp_ulysses.py
    │   ├── __init__.py
    │   └── reward_model
    │   │   ├── __init__.py
    │   │   ├── megatron
    │   │       └── __init__.py
    │   │   └── base.py
    ├── third_party
    │   ├── __pycache__
    │   │   └── __init__.cpython-310.pyc
    │   ├── sglang
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── parallel_state.cpython-310.pyc
    │   │   └── __init__.py
    │   ├── vllm
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-310.pyc
    │   │   └── __init__.py
    │   └── __init__.py
    ├── single_controller
    │   ├── __pycache__
    │   │   └── __init__.cpython-310.pyc
    │   ├── ray
    │   │   ├── __pycache__
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   │   └── __init__.py
    │   ├── base
    │   │   ├── __pycache__
    │   │   │   ├── worker.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── decorator.cpython-310.pyc
    │   │   │   └── worker_group.cpython-310.pyc
    │   │   ├── register_center
    │   │   │   ├── __pycache__
    │   │   │   │   ├── ray.cpython-310.pyc
    │   │   │   │   └── __init__.cpython-310.pyc
    │   │   │   ├── __init__.py
    │   │   │   └── ray.py
    │   │   ├── megatron
    │   │   │   ├── __init__.py
    │   │   │   └── worker_group.py
    │   │   └── __init__.py
    │   └── __init__.py
    ├── experimental
    │   ├── __init__.py
    │   └── agent_loop
    │   │   ├── __init__.py
    │   │   └── single_turn_agent_loop.py
    ├── model_merger
    │   ├── __init__.py
    │   └── __main__.py
    ├── __init__.py
    └── base_config.py
├── .gitignore
├── requirements-npu.txt
├── utils
    ├── __init__.py
    └── install_vllm_sglang_mcore.sh
└── scripts
    └── train_grpo.sh


/docs/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/search/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/search/llm_agent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.4.1.dev
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 | *.pyo
4 | *.pyd
5 | *.pyw
6 | *.pyz
7 | *.pywz
8 | *.pyzz


--------------------------------------------------------------------------------
/docs/static/pdfs/sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/pdfs/sample.pdf


--------------------------------------------------------------------------------
/docs/static/images/cases.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/cases.png


--------------------------------------------------------------------------------
/docs/static/images/hf-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/hf-logo.png


--------------------------------------------------------------------------------
/docs/static/images/datapipe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/datapipe.png


--------------------------------------------------------------------------------
/docs/static/images/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/framework.png


--------------------------------------------------------------------------------
/docs/static/images/eval_prior.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/eval_prior.png


--------------------------------------------------------------------------------
/docs/static/images/leaderboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/leaderboard.png


--------------------------------------------------------------------------------
/docs/static/images/main_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/docs/static/images/main_result.png


--------------------------------------------------------------------------------
/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 | 


--------------------------------------------------------------------------------
/verl/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/__pycache__/__init__.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/__init__.cpython-313.pyc


--------------------------------------------------------------------------------
/verl/__pycache__/protocol.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/protocol.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/__pycache__/protocol.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/protocol.cpython-313.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/fs.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/fs.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/__pycache__/base_config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/__pycache__/base_config.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/device.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/device.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/tools/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/tools/__pycache__/schemas.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/schemas.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/hdfs_io.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/hdfs_io.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/tracking.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/tracking.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/ulysses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/ulysses.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/interactions/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/models/__pycache__/registry.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/__pycache__/registry.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/tools/__pycache__/base_tool.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/base_tool.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/tools/__pycache__/search_tool.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/__pycache__/search_tool.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/main_ppo.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/__pycache__/main_ppo.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/net_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/net_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/ray_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/ray_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/vllm_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/vllm_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/reward.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/reward.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/flops_counter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/flops_counter.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/import_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/import_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/logging_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/logging_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/py_functional.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/py_functional.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/metric/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/metric/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/actor/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/critic/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/critic/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/interactions/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/tools/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/constants_ppo.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/__pycache__/constants_ppo.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/torch_functional.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/torch_functional.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/sampler.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/sampler.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/debug/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/debug/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/logger/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/logger/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/metric/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/metric/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/profiler/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/profiler/__pycache__/profile.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/profile.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/actor/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/config/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/config/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/config/__pycache__/algorithm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/config/__pycache__/algorithm.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/metric_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/metric_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/activation_offload.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/__pycache__/activation_offload.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/profiler/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/critic/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/critic/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/schemas.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/schemas.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/interactions/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/ray/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/sglang/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/sglang/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/tools/utils/__pycache__/tool_registry.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/utils/__pycache__/tool_registry.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/profiler/__pycache__/performance.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/profiler/__pycache__/performance.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__pycache__/dapo.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/dapo.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/models/transformers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/transformers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/models/transformers/__pycache__/npu_patch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/transformers/__pycache__/npu_patch.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/worker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/worker.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/logger/__pycache__/aggregate_logger.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/logger/__pycache__/aggregate_logger.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__pycache__/batch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/batch.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__pycache__/naive.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/naive.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__pycache__/prime.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/prime.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/models/transformers/__pycache__/monkey_patch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/models/transformers/__pycache__/monkey_patch.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/tools/utils/__pycache__/search_r1_like_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/tools/utils/__pycache__/search_r1_like_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__pycache__/registry.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/reward_manager/__pycache__/registry.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/sglang/__pycache__/parallel_state.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/third_party/sglang/__pycache__/parallel_state.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/sglang_rollout/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/fsdp_sglang.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_sglang.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/interactions/utils/__pycache__/interaction_registry.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/interactions/utils/__pycache__/interaction_registry.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/search_r1_like_qa_em.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/search_r1_like_qa_em.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/sglang_rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s3.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s3.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s4.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/utils/reward_score/__pycache__/search_r1_like_qa_em_s4.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/sglang_rollout/__pycache__/sglang_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/sglang_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentBAC/ReSeek/HEAD/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc


--------------------------------------------------------------------------------
/search/install.sh:
--------------------------------------------------------------------------------
1 | 
2 | pip install transformers datasets pyserini
3 | 
4 | ## install the gpu version faiss to guarantee efficient RL rollout
5 | conda install -c pytorch -c nvidia faiss-gpu=1.8.0
6 | 
7 | ## API function
8 | pip install uvicorn fastapi sentence_transformers==3.3.1


--------------------------------------------------------------------------------
/requirements-npu.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | hydra-core
 7 | numpy<2.0.0
 8 | pandas
 9 | peft
10 | pyarrow>=15.0.0
11 | pybind11
12 | pylatexenc
13 | tensordict<=0.6.2
14 | transformers==4.52.4
15 | ray==2.46.0
16 | wandb
17 | mathruler
18 | torchdata
19 | einops
20 | qwen_vl_utils
21 | torchvision==0.20.1
22 | 


--------------------------------------------------------------------------------
/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 
12 | ray_init:
13 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
14 |   timeline_json_file: null
15 | 


--------------------------------------------------------------------------------
/search/retrieval/retrieval_request.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | # URL for your local FastAPI server
 4 | url = "http://127.0.0.1:8000/retrieve"
 5 | 
 6 | # Example payload
 7 | payload = {
 8 |     "queries": ["What is the capital of France?", "Explain neural networks."] * 200,
 9 |     "topk": 5,
10 |     "return_scores": True
11 | }
12 | 
13 | # Send POST request
14 | response = requests.post(url, json=payload)
15 | 
16 | # Raise an exception if the request failed
17 | response.raise_for_status()
18 | 
19 | # Get the JSON response
20 | retrieved_data = response.json()
21 | 
22 | print("Response from server:")
23 | print(retrieved_data)
24 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/search/retrieval_launch.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | file_path=${DATA_DIR}/wiki-18
 3 | index_file=$file_path/e5_Flat.index
 4 | corpus_file=$file_path/wiki-18.jsonl
 5 | retriever_name=e5
 6 | retriever_path=${MODEL_DIR}/e5-base-v2
 7 | 
 8 | python scripts/runs/reseek/reseek_search/search/retrieval_server.py --index_path $index_file \
 9 |                                             --corpus_path $corpus_file \
10 |                                             --topk 3 \
11 |                                             --retriever_name $retriever_name \
12 |                                             --retriever_model $retriever_path \
13 |                                             --faiss_gpu
14 | 


--------------------------------------------------------------------------------
/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/experimental/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/model_merger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/experimental/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/verl/tools/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/verl/interactions/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/sglang_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | 
14 | from .sglang_rollout import SGLangRollout
15 | 
16 | __all__ = ["SGLangRollout"]
17 | 


--------------------------------------------------------------------------------
/verl/utils/metric/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .utils import reduce_metrics
16 | 
17 | __all__ = ["reduce_metrics"]
18 | 


--------------------------------------------------------------------------------
/verl/interactions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023-2024 SGLang Team
 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 
17 | __all__ = ["NaiveRollout"]
18 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 
17 | __all__ = ["BasePPORewardModel"]
18 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 
17 | __all__ = ["MegatronRewardModel"]
18 | 


--------------------------------------------------------------------------------
/verl/experimental/agent_loop/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .agent_loop import AgentLoopBase, AgentLoopManager
16 | 
17 | __all__ = ["AgentLoopBase", "AgentLoopManager"]
18 | 


--------------------------------------------------------------------------------
/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # APIs kept for backward compatibility purpose
16 | # For new features please develop in verl/utils/profiler/
17 | from ..profiler import *  # noqa
18 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/prime_code/README.md:
--------------------------------------------------------------------------------
 1 | ## LiveCodeBench
 2 | 
 3 | ### Introduction
 4 | [LiveCodeBench](https://github.com/LiveCodeBench/LiveCodeBench) provides holistic and contamination-free evaluation of coding capabilities of LLMs. Particularly, LiveCodeBench continuously collects new problems over time from contests across three competition platforms -- LeetCode, AtCoder, and CodeForces. 
 5 | 
 6 | ### How to reproduce
 7 | Our evaluation is grounded on the version found in LiveCodeBench.
 8 | > **Installation**
 9 | ```bash
10 | # Make sure the CUDA version > 12.0.
11 | pip install -r requirements.txt
12 | pip install flash-attn --no-build-isolation
13 | ```
14 | 
15 | ### Acknowleage
16 | Thank you to the [LiveCodeBench](https://livecodebench.github.io/leaderboard.html) team for their contributions to the open-source community.


--------------------------------------------------------------------------------
/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .hf_rollout import HFRollout
17 | from .naive import NaiveRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 
19 | __all__ = ["RLHFDataset", "RMDataset", "SFTDataset"]
20 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup
17 | 
18 | __all__ = ["Worker", "WorkerGroup", "ClassWithInitArgs", "ResourcePool"]
19 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/verl/trainer/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .algorithm import AlgoConfig, FilterGroupsConfig, KLControlConfig, PFPPOConfig
16 | 
17 | __all__ = [
18 |     "AlgoConfig",
19 |     "FilterGroupsConfig",
20 |     "KLControlConfig",
21 |     "PFPPOConfig",
22 | ]
23 | 


--------------------------------------------------------------------------------
/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # APIs kept for backward compatibility purpose
16 | # This file is deprecated, for new features please develop in profiler/performance.py
17 | from verl.utils.profiler.performance import simple_timer, reduce_timing  # noqa
18 | 


--------------------------------------------------------------------------------
/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import config, tokenizer
16 | from .config import omega_conf_to_dataclass
17 | from .tokenizer import hf_processor, hf_tokenizer
18 | 
19 | __all__ = tokenizer.__all__ + config.__all__ + ["hf_processor", "hf_tokenizer", "omega_conf_to_dataclass"]
20 | 


--------------------------------------------------------------------------------
/verl/trainer/constants_ppo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | PPO_RAY_RUNTIME_ENV = {
16 |     "env_vars": {
17 |         "TOKENIZERS_PARALLELISM": "true",
18 |         "NCCL_DEBUG": "WARN",
19 |         "VLLM_LOGGING_LEVEL": "WARN",
20 |         "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "true",
21 |     },
22 | }
23 | 


--------------------------------------------------------------------------------
/verl/models/mcore/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .registry import get_mcore_forward_fn, get_mcore_weight_converter, hf_to_mcore_config, init_mcore_model
17 | 
18 | __all__ = ["hf_to_mcore_config", "init_mcore_model", "get_mcore_forward_fn", "get_mcore_weight_converter"]
19 | 


--------------------------------------------------------------------------------
/verl/models/mcore/qwen2_5_vl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | # Copyright (c) 2024 Alibaba PAI Team.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | from .model import Qwen2_5VLModel
19 | from .vision_config import get_vision_model_config, get_vision_projection_config
20 | 
21 | __all__ = ["Qwen2_5VLModel", "get_vision_model_config", "get_vision_projection_config"]
22 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | 
17 | from verl import DataProto
18 | 
19 | __all__ = ["BaseRollout"]
20 | 
21 | 
22 | class BaseRollout(ABC):
23 |     """Base class for rollout."""
24 | 
25 |     @abstractmethod
26 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
27 |         """Generate sequences"""
28 |         pass
29 | 


--------------------------------------------------------------------------------
/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from . import base
17 | from .base import *
18 | 
19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
20 | 
21 | # Note(haibin.lin): single_controller.__version__ is deprecated
22 | with open(os.path.join(os.path.join(version_folder, os.pardir), "version/version")) as f:
23 |     __version__ = f.read().strip()
24 | 
25 | 
26 | __all__ = base.__all__
27 | 


--------------------------------------------------------------------------------
/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import (
16 |     RayClassWithInitArgs,
17 |     RayResourcePool,
18 |     RayWorkerGroup,
19 |     create_colocated_worker_cls,
20 |     create_colocated_worker_cls_fused,
21 | )
22 | 
23 | __all__ = [
24 |     "RayClassWithInitArgs",
25 |     "RayResourcePool",
26 |     "RayWorkerGroup",
27 |     "create_colocated_worker_cls",
28 |     "create_colocated_worker_cls_fused",
29 | ]
30 | 


--------------------------------------------------------------------------------
/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .aggregate_logger import (
17 |     DecoratorLoggerBase,
18 |     LocalLogger,
19 |     log_with_rank,
20 |     print_rank_0,
21 |     print_with_rank,
22 |     print_with_rank_and_timer,
23 | )
24 | 
25 | __all__ = [
26 |     "LocalLogger",
27 |     "DecoratorLoggerBase",
28 |     "print_rank_0",
29 |     "print_with_rank",
30 |     "print_with_rank_and_timer",
31 |     "log_with_rank",
32 | ]
33 | 


--------------------------------------------------------------------------------
/verl/models/mcore/mbridge.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | try:
17 |     from mbridge import AutoBridge
18 |     from mbridge.utils.post_creation_callbacks import freeze_moe_router, make_value_model
19 | except ImportError:
20 |     print("mbridge package not found. Please install mbridge with `pip install verl[mcore]` or `pip install mbridge`")
21 |     raise
22 | 
23 | __all__ = ["AutoBridge", "make_value_model", "freeze_moe_router"]
24 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelQwen2Attention
16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad
17 | from .parallel_mlp import ParallelQwen2MLP
18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm
19 | 
20 | __all__ = [
21 |     "ParallelQwen2Attention",
22 |     "ParallelQwen2DecoderLayer",
23 |     "ParallelQwen2DecoderLayerRmPad",
24 |     "ParallelQwen2MLP",
25 |     "ParallelQwen2RMSNorm",
26 | ]
27 | 


--------------------------------------------------------------------------------
/search/retrieval/build_index.sh:
--------------------------------------------------------------------------------
 1 | corpus_file=/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18.jsonl # jsonl
 2 | save_dir=/group/40077/shyuli/datasets/RL/wiki
 3 | retriever_name=conan
 4 | retriever_model=/group/40077/shyuli/models/ours/embedding/conan-0827/ckpts
 5 | # /group/40077/shyuli/models/embedding/bge-large-en-v1.5
 6 | # /group/40077/shyuli/models/ours/embedding/conan-0827/ckpts
 7 | # /group/40077/shyuli/models/embedding/e5-base-v2
 8 | # /group/40077/shyuli/models/embedding/qwen/Qwen3-Embedding-0.6B
 9 | #
10 | #
11 | # 
12 | # 
13 | 
14 | # change faiss_type to HNSW32/64/128 for ANN indexing
15 | # change retriever_name to bm25 for BM25 indexing
16 | # 使用多 GPU 进行 sentence_transformers 编码
17 | CUDA_VISIBLE_DEVICES=0,1,2,3 python index_builder.py \
18 |     --retrieval_method $retriever_name \
19 |     --model_path $retriever_model \
20 |     --corpus_path $corpus_file \
21 |     --save_dir $save_dir \
22 |     --use_fp16 \
23 |     --max_length 256 \
24 |     --batch_size 64 \
25 |     --pooling_method mean \
26 |     --faiss_type Flat \
27 |     --save_embedding \
28 |     --embedding_path /group/40077/shyuli/datasets/RL/wiki/emb_conan_slices


--------------------------------------------------------------------------------
/verl/utils/reward_score/math_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Individual Contributor: Mert Unsal
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .math import compute_score
16 | 
17 | 
18 | def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):
19 |     """
20 |     This is a demonstration of how the batched reward function should look like.
21 |     Typically, you want to use batched reward to speed up the process with parallelization
22 |     """
23 |     return [
24 |         compute_score(solution_str, ground_truth) for solution_str, ground_truth in zip(solution_strs, ground_truths)
25 |     ]
26 | 


--------------------------------------------------------------------------------
/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | 
18 | import torch
19 | 
20 | 
21 | def set_basic_config(level):
22 |     """
23 |     This function sets the global logging format and level. It will be called when import verl
24 |     """
25 |     logging.basicConfig(format="%(levelname)s:%(asctime)s:%(message)s", level=level)
26 | 
27 | 
28 | def log_to_file(string):
29 |     print(string)
30 |     if os.path.isdir("logs"):
31 |         with open(f"logs/log_{torch.distributed.get_rank()}", "a+") as f:
32 |             f.write(string + "\n")
33 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 |     def __init__(self):
23 |         self.timing = {}
24 | 
25 |     def __enter__(self):
26 |         pass
27 | 
28 |     def __exit__(self, exc_type, exc_value, traceback):
29 |         pass
30 | 
31 |     def preprocess_data(self, data: DataProto) -> DataProto:
32 |         return data
33 | 
34 |     def postprocess_data(self, data: DataProto) -> DataProto:
35 |         return data
36 | 


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .registry import get_reward_manager_cls, register  # noqa: I001
16 | from .batch import BatchRewardManager
17 | from .dapo import DAPORewardManager
18 | from .naive import NaiveRewardManager
19 | from .prime import PrimeRewardManager
20 | 
21 | # Note(haibin.lin): no need to include all reward managers here in case of complicated dependencies
22 | __all__ = [
23 |     "BatchRewardManager",
24 |     "DAPORewardManager",
25 |     "NaiveRewardManager",
26 |     "PrimeRewardManager",
27 |     "register",
28 |     "get_reward_manager_cls",
29 | ]
30 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Amazon.com Inc and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from abc import abstractmethod
15 | from collections.abc import Sized
16 | 
17 | from omegaconf import DictConfig
18 | from torch.utils.data import Sampler
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class AbstractSampler(Sampler[int]):
24 |     @abstractmethod
25 |     def __init__(
26 |         self,
27 |         data_source: Sized,
28 |         config: DictConfig,
29 |     ):
30 |         pass
31 | 
32 | 
33 | class AbstractCurriculumSampler(AbstractSampler):
34 |     @abstractmethod
35 |     def update(self, batch: DataProto) -> None:
36 |         pass
37 | 


--------------------------------------------------------------------------------
/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | import torch
21 | 
22 | from verl import DataProto
23 | 
24 | __all__ = ["BasePPOCritic"]
25 | 
26 | 
27 | class BasePPOCritic(ABC):
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/utils/profiler/empty_annotations.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Callable, Optional
16 | 
17 | 
18 | def mark_start_range(
19 |     message: Optional[str] = None,
20 |     color: Optional[str] = None,
21 |     domain: Optional[str] = None,
22 |     category: Optional[str] = None,
23 | ) -> None:
24 |     pass
25 | 
26 | 
27 | def mark_end_range(range_id: str) -> None:
28 |     pass
29 | 
30 | 
31 | def mark_annotate(
32 |     message: Optional[str] = None,
33 |     color: Optional[str] = None,
34 |     domain: Optional[str] = None,
35 |     category: Optional[str] = None,
36 | ) -> Callable:
37 |     def decorator(func):
38 |         return func
39 | 
40 |     return decorator
41 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     ParallelLlamaForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelLlamaForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelLlamaForCausalLMRmPadPP,
21 |     ParallelLlamaForValueRmPad,
22 |     ParallelLlamaForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelLlamaModel,
25 | )
26 | 
27 | __all__ = [
28 |     "ParallelLlamaForCausalLM",
29 |     "ParallelLlamaForCausalLMRmPad",
30 |     "ParallelLlamaForCausalLMRmPadPP",
31 |     "ParallelLlamaForValueRmPad",
32 |     "ParallelLlamaForValueRmPadPP",
33 |     "ParallelLlamaModel",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_qwen2_megatron import (
16 |     ParallelQwen2ForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelQwen2ForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelQwen2ForCausalLMRmPadPP,
21 |     ParallelQwen2ForValueRmPad,
22 |     ParallelQwen2ForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelQwen2Model,
25 | )
26 | 
27 | __all__ = [
28 |     "ParallelQwen2ForCausalLM",
29 |     "ParallelQwen2ForCausalLMRmPad",
30 |     "ParallelQwen2ForCausalLMRmPadPP",
31 |     "ParallelQwen2ForValueRmPad",
32 |     "ParallelQwen2ForValueRmPadPP",
33 |     "ParallelQwen2Model",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_linear import (
18 |     LinearForLastLayer,
19 |     MergedColumnParallelLinear,
20 |     QKVParallelLinear,
21 | )
22 | from .parallel_mlp import ParallelLlamaMLP
23 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
24 | 
25 | __all__ = [
26 |     "LinearForLastLayer",
27 |     "MergedColumnParallelLinear",
28 |     "QKVParallelLinear",
29 |     "ParallelLlamaAttention",
30 |     "ParallelLlamaDecoderLayer",
31 |     "ParallelLlamaDecoderLayerRmPad",
32 |     "ParallelLlamaMLP",
33 |     "ParallelLlamaRMSNorm",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/third_party/sglang/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
15 | #
16 | # Licensed under the Apache License, Version 2.0 (the "License");
17 | # you may not use this file except in compliance with the License.
18 | # You may obtain a copy of the License at
19 | #
20 | #     http://www.apache.org/licenses/LICENSE-2.0
21 | #
22 | # Unless required by applicable law or agreed to in writing, software
23 | # distributed under the License is distributed on an "AS IS" BASIS,
24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | # See the License for the specific language governing permissions and
26 | # limitations under the License.
27 | 


--------------------------------------------------------------------------------
/search/retrieval/build_index_vllm_api.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # vLLM HTTP API 版本的索引构建脚本
 4 | # 使用方法：
 5 | # 1. 先启动 vLLM 服务器
 6 | # 2. 再运行此脚本构建索引
 7 | 
 8 | # 配置参数
 9 | CORPUS_PATH="/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18.jsonl"         # 替换为你的语料库路径
10 | SAVE_DIR="/group/40077/shyuli/datasets/RL/hot_benchmark/wiki"
11 | RETRIEVAL_METHOD=qwen                 # 或者 bge, contriever 等
12 | BATCH_SIZE=64                          # API 调用的批次大小
13 | VLLM_API_URL="http://localhost:8000"   # vLLM 服务器地址
14 | #EMBEDDING_PATH="/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18-e5/emb_e5.memmap" # 预计算的 embedding 文件路径
15 | corpus_file=/group/40077/shyuli/datasets/RL/hot_benchmark/wiki/hot-wiki-18.jsonl # jsonl
16 | # change faiss_type to HNSW32/64/128 for ANN indexing
17 | # change retriever_name to bm25 for BM25 indexing
18 | 
19 | #"vllm serve /group/40077/shyuli/models/embedding/e5-base-v2 --task embed --host 0.0.0.0 --port 8000 --data-parallel-size 2"
20 | 
21 | echo
22 | echo "开始构建索引..."
23 | 
24 | # 运行索引构建
25 | python index_builder_api.py \
26 |     --retrieval_method $RETRIEVAL_METHOD \
27 |     --corpus_path $CORPUS_PATH \
28 |     --save_dir $SAVE_DIR \
29 |     --batch_size $BATCH_SIZE \
30 |     --vllm_api_url $VLLM_API_URL \
31 |     --max_length 256 \
32 |     --save_embedding \
33 |     --faiss_type "Flat"
34 | 
35 | # --embedding_path $EMBEDDING_PATH \
36 | echo "索引构建完成！"


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | import ray
18 | 
19 | 
20 | @ray.remote
21 | class WorkerGroupRegisterCenter:
22 |     def __init__(self, rank_zero_info):
23 |         self.rank_zero_info = rank_zero_info
24 |         # rank -> node_id
25 |         self.workers_info: Dict[int, str] = {}
26 | 
27 |     def get_rank_zero_info(self):
28 |         return self.rank_zero_info
29 | 
30 |     def set_worker_info(self, rank, node_id) -> None:
31 |         self.workers_info[rank] = node_id
32 | 
33 |     def get_worker_info(self) -> Dict[int, str]:
34 |         return self.workers_info
35 | 
36 | 
37 | def create_worker_group_register_center(name, info):
38 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
39 | 


--------------------------------------------------------------------------------
/verl/utils/kernel/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
19 | #
20 | # Licensed under the Apache License, Version 2.0 (the "License");
21 | # you may not use this file except in compliance with the License.
22 | # You may obtain a copy of the License at
23 | #
24 | #     http://www.apache.org/licenses/LICENSE-2.0
25 | #
26 | # Unless required by applicable law or agreed to in writing, software
27 | # distributed under the License is distributed on an "AS IS" BASIS,
28 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29 | # See the License for the specific language governing permissions and
30 | # limitations under the License.
31 | 
32 | 


--------------------------------------------------------------------------------
/verl/utils/profiler/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ..import_utils import is_nvtx_available
16 | from .performance import GPUMemoryLogger, log_gpu_memory_usage, simple_timer
17 | from .profile import DistProfilerExtension, ProfilerConfig
18 | 
19 | if is_nvtx_available():
20 |     from .nvtx_profile import NsightSystemsProfiler as DistProfiler
21 |     from .nvtx_profile import mark_annotate, mark_end_range, mark_start_range, marked_timer
22 | else:
23 |     from .performance import marked_timer
24 |     from .profile import DistProfiler, mark_annotate, mark_end_range, mark_start_range
25 | 
26 | __all__ = [
27 |     "GPUMemoryLogger",
28 |     "log_gpu_memory_usage",
29 |     "mark_start_range",
30 |     "mark_end_range",
31 |     "mark_annotate",
32 |     "DistProfiler",
33 |     "DistProfilerExtension",
34 |     "ProfilerConfig",
35 |     "simple_timer",
36 |     "marked_timer",
37 | ]
38 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/geo3k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from mathruler.grader import extract_boxed_content, grade_answer
17 | 
18 | 
19 | def format_reward(predict_str: str) -> float:
20 |     pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
21 |     match_result = re.fullmatch(pattern, predict_str)
22 |     return 1.0 if match_result else 0.0
23 | 
24 | 
25 | def acc_reward(predict_str: str, ground_truth: str, use_boxed: bool = True) -> float:
26 |     if use_boxed:
27 |         answer = extract_boxed_content(predict_str)
28 |     else:
29 |         answer = predict_str
30 |     return 1.0 if grade_answer(answer, ground_truth) else 0.0
31 | 
32 | 
33 | def compute_score(predict_str: str, ground_truth: str, use_boxed: bool = True, format_score: float = 0.1) -> float:
34 |     return (1.0 - format_score) * acc_reward(predict_str, ground_truth, use_boxed) + format_score * format_reward(
35 |         predict_str
36 |     )
37 | 


--------------------------------------------------------------------------------
/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | 
16 | import os
17 | 
18 | import torch.distributed
19 | 
20 | from verl.utils.device import get_nccl_backend, get_torch_device
21 | 
22 | 
23 | def initialize_global_process_group(timeout_second=36000):
24 |     from datetime import timedelta
25 | 
26 |     torch.distributed.init_process_group(
27 |         get_nccl_backend(),
28 |         timeout=timedelta(seconds=timeout_second),
29 |         init_method=os.environ.get("DIST_INIT_METHOD", None),
30 |     )
31 |     local_rank = int(os.environ["LOCAL_RANK"])
32 |     rank = int(os.environ["RANK"])
33 |     world_size = int(os.environ["WORLD_SIZE"])
34 | 
35 |     if torch.distributed.is_initialized():
36 |         get_torch_device().set_device(local_rank)
37 |     return local_rank, rank, world_size
38 | 
39 | 
40 | def destroy_global_process_group():
41 |     if torch.distributed.is_initialized():
42 |         torch.distributed.destroy_process_group()
43 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | from verl.utils.device import get_device_id
18 | 
19 | 
20 | class MemoryBuffer:
21 |     def __init__(self, numel, numel_padded, dtype):
22 |         self.numel = numel
23 |         self.numel_padded = numel_padded
24 |         self.dtype = dtype
25 |         self.data = torch.zeros(self.numel_padded, dtype=self.dtype, device=get_device_id(), requires_grad=False)
26 | 
27 |     def zero(self):
28 |         """Reset the buffer to zero."""
29 |         self.data.zero_()
30 | 
31 |     def get(self, shape, start_index):
32 |         """Return a tensor with the input `shape` as a view into the
33 |         1-D data starting at `start_index`."""
34 |         end_index = start_index + shape.numel()
35 |         assert end_index <= self.numel, "requested tensor is out of the buffer range."
36 |         buffer_tensor = self.data[start_index:end_index]
37 |         buffer_tensor = buffer_tensor.view(shape)
38 |         return buffer_tensor
39 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/math_verify.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | try:
16 |     from math_verify.errors import TimeoutException
17 |     from math_verify.metric import math_metric
18 |     from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
19 | except ImportError:
20 |     print("To use Math-Verify, please install it first by running `pip install math-verify`.")
21 | 
22 | 
23 | def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> bool:
24 |     verify_func = math_metric(
25 |         gold_extraction_target=(LatexExtractionConfig(),),
26 |         pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
27 |     )
28 |     ret_score = 0.0
29 | 
30 |     # Wrap the ground truth in \boxed{} format for verification
31 |     ground_truth_boxed = "\\boxed{" + ground_truth + "}"
32 |     try:
33 |         ret_score, _ = verify_func([ground_truth_boxed], [model_output])
34 |     except Exception:
35 |         pass
36 |     except TimeoutException:
37 |         ret_score = timeout_score
38 | 
39 |     return ret_score
40 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | from importlib.metadata import PackageNotFoundError, version
16 | 
17 | from .vllm_rollout_spmd import vLLMAsyncRollout, vLLMRollout  # noqa: F401
18 | 
19 | 
20 | def get_version(pkg):
21 |     try:
22 |         return version(pkg)
23 |     except PackageNotFoundError:
24 |         return None
25 | 
26 | 
27 | vllm_package_name = "vllm"
28 | vllm_package_version = get_version(vllm_package_name)
29 | if vllm_package_version is None:
30 |     raise PackageNotFoundError(
31 |         "To use vllm rollout, please ensure the 'vllm' package is properly installed. See "
32 |         "https://verl.readthedocs.io/en/latest/start/install.html for more details"
33 |     )
34 | 
35 | if "ROCM_PATH" in os.environ:
36 |     import re
37 | 
38 |     match = re.match(r"(\d+\.\d+\.?\d*)", vllm_package_version)
39 |     if match:
40 |         vllm_package_version = match.group(1)
41 |     else:
42 |         raise ValueError(f"Warning: Could not parse version format: {vllm_package_version}")
43 | 
44 | vllm_mode = "spmd"
45 | 


--------------------------------------------------------------------------------
/verl/workers/reward_manager/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["register", "get_reward_manager_cls"]
16 | 
17 | REWARD_MANAGER_REGISTRY = {}
18 | 
19 | 
20 | def register(name):
21 |     """Decorator to register a reward manager class with a given name.
22 | 
23 |     Args:
24 |         name: `(str)`
25 |             The name of the reward manager.
26 |     """
27 | 
28 |     def decorator(cls):
29 |         if name in REWARD_MANAGER_REGISTRY and REWARD_MANAGER_REGISTRY[name] != cls:
30 |             raise ValueError(
31 |                 f"Reward manager {name} has already been registered: {REWARD_MANAGER_REGISTRY[name]} vs {cls}"
32 |             )
33 |         REWARD_MANAGER_REGISTRY[name] = cls
34 |         return cls
35 | 
36 |     return decorator
37 | 
38 | 
39 | def get_reward_manager_cls(name):
40 |     """Get the reward manager class with a given name.
41 | 
42 |     Args:
43 |         name: `(str)`
44 |             The name of the reward manager.
45 | 
46 |     Returns:
47 |         `(type)`: The reward manager class.
48 |     """
49 |     if name not in REWARD_MANAGER_REGISTRY:
50 |         raise ValueError(f"Unknown reward manager: {name}")
51 |     return REWARD_MANAGER_REGISTRY[name]
52 | 


--------------------------------------------------------------------------------
/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 |   device: cuda
 5 | 
 6 | data:
 7 |   path: ~/data/rlhf/math/test.parquet
 8 |   prompt_key: prompt
 9 |   n_samples: 5
10 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
11 |   batch_size: 128
12 | 
13 | model:
14 |   path: ~/models/Qwen2-7B-Instruct
15 |   external_lib: null
16 | rollout:
17 |   name: vllm
18 |   mode: sync # sync: LLM, async: AsyncLLM
19 |   temperature: 1.0
20 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
21 |   top_p: 0.7
22 |   prompt_length: 1536
23 |   response_length: 512
24 |   # for vllm rollout
25 |   dtype: bfloat16 # should align with FSDP
26 |   gpu_memory_utilization: 0.5
27 |   ignore_eos: False
28 |   enforce_eager: True
29 |   free_cache_engine: True
30 |   load_format: dummy_dtensor
31 |   tensor_model_parallel_size: 1
32 |   max_num_batched_tokens: 8192
33 |   max_model_len: null
34 |   max_num_seqs: 1024
35 |   log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
36 |   log_prob_micro_batch_size_per_gpu: 8
37 |   # for hf rollout
38 |   do_sample: True
39 |   disable_log_stats: True
40 |   enable_chunked_prefill: True
41 |   n: 1
42 |   # support logging rollout prob for debugging purpose
43 |   calculate_log_probs: False
44 | actor:
45 |   strategy: fsdp  # This is for backward-compatibility
46 |   ulysses_sequence_parallel_size: 1 # sp size
47 |   entropy_from_logits_with_chunking: False  # calculate entropy with chunking to reduce memory peak
48 |   entropy_checkpointing: False  # recompute entropy
49 |   fsdp_config:
50 |     fsdp_size: -1
51 |     forward_prefetch: False  # FSDP1 forward_prefetch configuration
52 | 
53 | ray_init:
54 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
55 |   timeline_json_file: null
56 | 


--------------------------------------------------------------------------------
/verl/models/README.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 
 3 | ## Adding a New Huggingface Model
 4 | ### Step 1: Copy the model file from HF to verl
 5 | - Add a new file under verl/models/hf
 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf
 7 | 
 8 | ### Step 2: Modify the model file to use packed inputs
 9 | - Remove all the code related to inference (kv cache)
10 | - Modify the inputs to include only
11 |     - input_ids (total_nnz,)
12 |     - cu_seqlens (total_nnz + 1,)
13 |     - max_seqlen_in_batch: int
14 | - Note that this requires using flash attention with causal mask.
15 | 
16 | ### Step 2.5: Add tests
17 | - Add a test to compare this version and the huggingface version
18 | - Following the infrastructure and add tests to tests/models/hf
19 | 
20 | ### Step 3: Add a function to apply tensor parallelism
21 | - Please follow
22 |     - https://pytorch.org/docs/stable/distributed.tensor.parallel.html
23 |     - https://pytorch.org/tutorials/intermediate/TP_tutorial.html
24 | - General comments
25 |     - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward.
26 | 
27 | ### Step 4: Add a function to apply data parallelism
28 | - Please use FSDP2 APIs
29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413
30 | 
31 | ### Step 5: Add a function to apply pipeline parallelism
32 | - Comes in Pytorch 2.4
33 | - Currently only in alpha in nightly version
34 | - Check torchtitan for more details
35 | 
36 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for reward model
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class BasePPORewardModel(ABC):
24 |     def __init__(self, config):
25 |         self.config = config
26 | 
27 |     @abstractmethod
28 |     def compute_reward(self, data: DataProto) -> DataProto:
29 |         """Computing reward given input_ids. The transformers should output a tensor with shape
30 |            [batch_size, sequence_length], and the value at [EOS] mask should be gathered.
31 | 
32 |         Args:
33 |             data: must contain keys "input_ids", "attention_mask" and "position_ids".
34 |                 - input_ids: [batch_size, sequence_length]
35 |                 - attention_mask: [batch_size, sequence_length]
36 |                 - position_ids: [batch_size, sequence_length]
37 | 
38 |         Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward.
39 |             Other position should have zero reward. Note that this may change in the future if we use
40 |             dense reward. So, we leave the interface for general case.
41 |             - reward: [batch_size, sequence_length].
42 | 
43 |         """
44 |         pass
45 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | 
17 | import torch
18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
19 | from megatron.core import ModelParallelConfig
20 | from torch import nn
21 | from transformers import LlamaConfig
22 | 
23 | from verl.utils.megatron import sequence_parallel as sp_utils
24 | 
25 | 
26 | class ParallelLlamaRMSNorm(nn.Module):
27 |     def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig):
28 |         """
29 |         LlamaRMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(
43 |             input=hidden_states,
44 |             weight=self.weight,
45 |             normalized_shape=self.normalized_shape,
46 |             eps=self.variance_epsilon,
47 |             memory_efficient=True,
48 |         )
49 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | 
17 | import torch
18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
19 | from megatron.core import ModelParallelConfig
20 | from torch import nn
21 | from transformers import Qwen2Config
22 | 
23 | from verl.utils.megatron import sequence_parallel as sp_utils
24 | 
25 | 
26 | class ParallelQwen2RMSNorm(nn.Module):
27 |     def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig):
28 |         """
29 |         Qwen2RMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(
43 |             input=hidden_states,
44 |             weight=self.weight,
45 |             normalized_shape=self.normalized_shape,
46 |             eps=self.variance_epsilon,
47 |             memory_efficient=True,
48 |         )
49 | 


--------------------------------------------------------------------------------
/verl/utils/metric/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Metrics utils.
16 | """
17 | 
18 | from typing import Any, Dict, List
19 | 
20 | import numpy as np
21 | 
22 | 
23 | def reduce_metrics(metrics: Dict[str, List[Any]]) -> Dict[str, Any]:
24 |     """
25 |     Reduces a dictionary of metric lists by computing the mean, max, or min of each list.
26 |     The reduce operation is determined by the key name:
27 |     - If the key contains "max", np.max is used
28 |     - If the key contains "min", np.min is used
29 |     - Otherwise, np.mean is used
30 | 
31 |     Args:
32 |         metrics: A dictionary mapping metric names to lists of metric values.
33 | 
34 |     Returns:
35 |         A dictionary with the same keys but with each list replaced by its reduced value.
36 | 
37 |     Example:
38 |         >>> metrics = {
39 |         ...     "loss": [1.0, 2.0, 3.0],
40 |         ...     "accuracy": [0.8, 0.9, 0.7],
41 |         ...     "max_reward": [5.0, 8.0, 6.0],
42 |         ...     "min_error": [0.1, 0.05, 0.2]
43 |         ... }
44 |         >>> reduce_metrics(metrics)
45 |         {"loss": 2.0, "accuracy": 0.8, "max_reward": 8.0, "min_error": 0.05}
46 |     """
47 |     for key, val in metrics.items():
48 |         if "max" in key:
49 |             metrics[key] = np.max(val)
50 |         elif "min" in key:
51 |             metrics[key] = np.min(val)
52 |         else:
53 |             metrics[key] = np.mean(val)
54 |     return metrics
55 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/sequence_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | import torch.nn.functional as F
18 | from megatron.core import parallel_state as mpu
19 | 
20 | 
21 | def mark_parameter_as_sequence_parallel(parameter):
22 |     parameter.sequence_parallel = True
23 | 
24 | 
25 | def is_sequence_parallel_param(param):
26 |     return hasattr(param, "sequence_parallel") and param.sequence_parallel
27 | 
28 | 
29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor):
30 |     """pad the tokens such that the total length is a multiple of sp world size
31 | 
32 |     Args:
33 |         unpad_tokens: (total_nnz, ...). Tokens after removing padding
34 | 
35 |     Returns:
36 |         the padded tokens: (total_nnz + pad_size,...)
37 | 
38 |     """
39 |     total_nnz = unpad_tokens.shape[0]
40 |     sp_world_size = mpu.get_tensor_model_parallel_world_size()
41 | 
42 |     pad_size = 0 if total_nnz % sp_world_size == 0 else sp_world_size - total_nnz % sp_world_size
43 | 
44 |     if pad_size > 0:
45 |         if unpad_tokens.ndim == 1:
46 |             unpad_tokens = F.pad(unpad_tokens, (0, pad_size))
47 |         elif unpad_tokens.ndim == 2:
48 |             unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size))
49 |         else:
50 |             raise NotImplementedError(f"Padding dim {unpad_tokens.ndim()} is not supported")
51 | 
52 |     return unpad_tokens
53 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from importlib.metadata import PackageNotFoundError, version
16 | 
17 | from packaging import version as vs
18 | 
19 | from verl.utils.import_utils import is_sglang_available
20 | 
21 | 
22 | def get_version(pkg):
23 |     try:
24 |         return version(pkg)
25 |     except PackageNotFoundError:
26 |         return None
27 | 
28 | 
29 | package_name = "vllm"
30 | package_version = get_version(package_name)
31 | vllm_version = None
32 | 
33 | if package_version is None:
34 |     if not is_sglang_available():
35 |         raise ValueError(
36 |             f"vllm version {package_version} not supported and SGLang also not Found. Currently supported "
37 |             f"vllm versions are 0.7.0+"
38 |         )
39 | elif vs.parse(package_version) >= vs.parse("0.7.0"):
40 |     vllm_version = package_version
41 |     from vllm import LLM
42 |     from vllm.distributed import parallel_state
43 | else:
44 |     if vs.parse(package_version) in [vs.parse("0.5.4"), vs.parse("0.6.3")]:
45 |         raise ValueError(
46 |             f"vLLM version {package_version} support has been removed. vLLM 0.5.4 and 0.6.3 are no longer "
47 |             f"supported. Please use vLLM 0.7.0 or later."
48 |         )
49 |     if not is_sglang_available():
50 |         raise ValueError(
51 |             f"vllm version {package_version} not supported and SGLang also not Found. Currently supported "
52 |             f"vllm versions are 0.7.0+"
53 |         )
54 | 
55 | __all__ = ["LLM", "parallel_state"]
56 | 


--------------------------------------------------------------------------------
/verl/tools/utils/mcp_clients/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import threading
17 | import time
18 | 
19 | from mcp import Tool
20 | 
21 | logger = logging.getLogger(__file__)
22 | 
23 | 
24 | class TokenBucket:
25 |     def __init__(self, rate_limit: float):
26 |         self.rate_limit = rate_limit  # tokens per second
27 |         self.tokens = rate_limit
28 |         self.last_update = time.time()
29 |         self.lock = threading.Lock()
30 | 
31 |     def acquire(self) -> bool:
32 |         with self.lock:
33 |             now = time.time()
34 |             # Add new tokens based on time elapsed
35 |             new_tokens = (now - self.last_update) * self.rate_limit
36 |             self.tokens = min(self.rate_limit, self.tokens + new_tokens)
37 |             self.last_update = now
38 | 
39 |             if self.tokens >= 1:
40 |                 self.tokens -= 1
41 |                 return True
42 |             return False
43 | 
44 | 
45 | def mcp2openai(mcp_tool: Tool) -> dict:
46 |     """Convert a MCP Tool to an OpenAI ChatCompletionTool."""
47 |     openai_format = {
48 |         "type": "function",
49 |         "function": {
50 |             "name": mcp_tool.name,
51 |             "description": mcp_tool.description,
52 |             "parameters": mcp_tool.inputSchema,
53 |             "strict": False,
54 |         },
55 |     }
56 |     if not openai_format["function"]["parameters"].get("required", None):
57 |         openai_format["function"]["parameters"]["required"] = []
58 |     return openai_format
59 | 


--------------------------------------------------------------------------------
/verl/utils/profiler/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass, field
16 | 
17 | from verl.base_config import BaseConfig
18 | 
19 | 
20 | @dataclass(frozen=True)
21 | class ProfilerConfig(BaseConfig):
22 |     """Worker profiler config. Currently only support Nsight system profiler."""
23 | 
24 |     # True for each task has its own database, False for all tasks in one training step share one database.
25 |     discrete: bool = False
26 | 
27 |     # Whether to profile all ranks.
28 |     all_ranks: bool = False
29 | 
30 |     # The ranks that will be profiled. [] or [0,1,...]
31 |     ranks: list[int] = field(default_factory=list)
32 | 
33 |     def union(self, other: "ProfilerConfig") -> "ProfilerConfig":
34 |         return ProfilerConfig(
35 |             all_ranks=self.all_ranks or other.all_ranks,
36 |             ranks=list(set(self.ranks or []) | set(other.ranks or [])),
37 |             discrete=self.discrete or other.discrete,
38 |         )
39 | 
40 |     def intersect(self, other: "ProfilerConfig") -> "ProfilerConfig":
41 |         return ProfilerConfig(
42 |             all_ranks=self.all_ranks and other.all_ranks,
43 |             ranks=list(set(self.ranks or []) & set(other.ranks or [])),
44 |             discrete=self.discrete and other.discrete,
45 |         )
46 | 
47 |     def __post_init__(self) -> None:
48 |         """config validation logics go here"""
49 |         assert isinstance(self.ranks, (set, list, tuple)), (
50 |             f"Profiler ranks must be of type list, got {type(self.ranks)}"
51 |         )
52 | 


--------------------------------------------------------------------------------
/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
 4 |   micro_batch_size_per_gpu: 4  # this is also val batch size
 5 |   train_files: ~/data/gsm8k/train.parquet
 6 |   val_files: ~/data/gsm8k/test.parquet
 7 |   # Single-turn settings
 8 |   prompt_key: question
 9 |   response_key: answer
10 |   prompt_dict_keys: null
11 |   response_dict_keys: null
12 |   # Multi-turn settings
13 |   multiturn:
14 |     enable: false  # Set to true to use multi-turn dataset
15 |     messages_key: messages  # Key for messages list in multi-turn mode
16 |     tools_key: tools  # Key for tools list in multi-turn mode
17 |     enable_thinking_key: enable_thinking  # Whether to enable thinking in multi-turn mode
18 |   max_length: 1024
19 |   truncation: error
20 |   balance_dp_token: False
21 |   chat_template: null
22 |   custom_cls:
23 |     path: null
24 |     name: null
25 |   use_shm: False
26 | model:
27 |   partial_pretrain: ~/models/gemma-1.1-7b-it
28 |   use_shm: False
29 |   fsdp_config:
30 |     model_dtype: fp32
31 |     wrap_policy:
32 |       min_num_params: 0
33 |     cpu_offload: False
34 |     offload_params: False
35 |   external_lib: null
36 |   enable_gradient_checkpointing: True
37 |   trust_remote_code: False
38 |   lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
39 |   lora_alpha: 16  # LoRA scaling factor
40 |   target_modules: all-linear  # Target modules for LoRA adaptation
41 |   use_liger: False
42 |   strategy: fsdp2
43 | optim:
44 |   lr: 1e-5
45 |   betas: [0.9, 0.95]
46 |   weight_decay: 0.01
47 |   warmup_steps_ratio: 0.1
48 |   clip_grad: 1.0
49 |   lr_scheduler: cosine
50 | ulysses_sequence_parallel_size: 1
51 | use_remove_padding: False
52 | trainer:
53 |   default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
54 |   default_hdfs_dir: null
55 |   resume_path: null
56 |   project_name: gsm8k-sft
57 |   experiment_name: test
58 |   total_epochs: 4
59 |   total_training_steps: null
60 |   logger: [ 'console', 'wandb' ]
61 |   seed: 1
62 | 
63 |   save_freq: -1
64 |   test_freq: -1
65 |   nnodes: 1
66 |   n_gpus_per_node: 8
67 |   max_ckpt_to_keep: null # TODO
68 | 


--------------------------------------------------------------------------------
/verl/models/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib
16 | from typing import List, Optional, Type
17 | 
18 | import torch.nn as nn
19 | 
20 | # Supported models in Megatron-LM
21 | # Architecture -> (module, class).
22 | _MODELS = {
23 |     "LlamaForCausalLM": (
24 |         "llama",
25 |         ("ParallelLlamaForCausalLMRmPadPP", "ParallelLlamaForValueRmPadPP", "ParallelLlamaForCausalLMRmPad"),
26 |     ),
27 |     "Qwen2ForCausalLM": (
28 |         "qwen2",
29 |         ("ParallelQwen2ForCausalLMRmPadPP", "ParallelQwen2ForValueRmPadPP", "ParallelQwen2ForCausalLMRmPad"),
30 |     ),
31 |     "MistralForCausalLM": (
32 |         "mistral",
33 |         ("ParallelMistralForCausalLMRmPadPP", "ParallelMistralForValueRmPadPP", "ParallelMistralForCausalLMRmPad"),
34 |     ),
35 | }
36 | 
37 | 
38 | # return model class
39 | class ModelRegistry:
40 |     @staticmethod
41 |     def load_model_cls(model_arch: str, value=False) -> Optional[Type[nn.Module]]:
42 |         if model_arch not in _MODELS:
43 |             return None
44 | 
45 |         megatron = "megatron"
46 | 
47 |         module_name, model_cls_name = _MODELS[model_arch]
48 |         if not value:  # actor/ref
49 |             model_cls_name = model_cls_name[0]
50 |         elif value:  # critic/rm
51 |             model_cls_name = model_cls_name[1]
52 | 
53 |         module = importlib.import_module(f"verl.models.{module_name}.{megatron}.modeling_{module_name}_megatron")
54 |         return getattr(module, model_cls_name, None)
55 | 
56 |     @staticmethod
57 |     def get_supported_archs() -> List[str]:
58 |         return list(_MODELS.keys())
59 | 


--------------------------------------------------------------------------------
/verl/utils/net_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
15 | #
16 | # Licensed under the Apache License, Version 2.0 (the "License");
17 | # you may not use this file except in compliance with the License.
18 | # You may obtain a copy of the License at
19 | #
20 | #     http://www.apache.org/licenses/LICENSE-2.0
21 | #
22 | # Unless required by applicable law or agreed to in writing, software
23 | # distributed under the License is distributed on an "AS IS" BASIS,
24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | # See the License for the specific language governing permissions and
26 | # limitations under the License.
27 | import ipaddress
28 | 
29 | 
30 | def is_ipv4(ip_str: str) -> bool:
31 |     """
32 |     Check if the given string is an IPv4 address
33 | 
34 |     Args:
35 |         ip_str: The IP address string to check
36 | 
37 |     Returns:
38 |         bool: Returns True if it's an IPv4 address, False otherwise
39 |     """
40 |     try:
41 |         ipaddress.IPv4Address(ip_str)
42 |         return True
43 |     except ipaddress.AddressValueError:
44 |         return False
45 | 
46 | 
47 | def is_ipv6(ip_str: str) -> bool:
48 |     """
49 |     Check if the given string is an IPv6 address
50 | 
51 |     Args:
52 |         ip_str: The IP address string to check
53 | 
54 |     Returns:
55 |         bool: Returns True if it's an IPv6 address, False otherwise
56 |     """
57 |     try:
58 |         ipaddress.IPv6Address(ip_str)
59 |         return True
60 |     except ipaddress.AddressValueError:
61 |         return False
62 | 


--------------------------------------------------------------------------------
/verl/models/transformers/npu_patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Copyright 2025 The Qwen Team and The HuggingFace Inc. team
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from typing import Tuple
18 | 
19 | import torch
20 | import torch_npu
21 | from torch_npu import npu_rotary_mul as apply_rotary_emb
22 | from transformers.models.qwen2_5_vl import modeling_qwen2_5_vl
23 | from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2RMSNorm
24 | 
25 | 
26 | # This patch takes effect when using apply_rotary_pos_emb_flashatt on qwen2_5_vl and will be removed in
27 | # subsequent versions
28 | # https://github.com/huggingface/transformers/pull/38491
29 | def apply_rotary_pos_emb_flashatt_npu(
30 |     q: torch.Tensor, k: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor
31 | ) -> Tuple[torch.Tensor, torch.Tensor]:
32 |     cos = cos.chunk(2, dim=-1)[0].contiguous()
33 |     sin = sin.chunk(2, dim=-1)[0].contiguous()
34 |     cos = cos.repeat(1, 2)
35 |     sin = sin.repeat(1, 2)
36 |     q_embed = apply_rotary_emb(
37 |         q.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float()
38 |     ).type_as(q)
39 |     k_embed = apply_rotary_emb(
40 |         k.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float()
41 |     ).type_as(k)
42 |     return q_embed, k_embed
43 | 
44 | 
45 | # This api can improve performance on ASCEND NPU
46 | def rms_norm_forward(self, x):
47 |     return torch_npu.npu_rms_norm(x, self.weight, epsilon=self.variance_epsilon)[0]
48 | 
49 | 
50 | Qwen2RMSNorm.forward = rms_norm_forward
51 | modeling_qwen2_5_vl.apply_rotary_pos_emb_flashatt = apply_rotary_pos_emb_flashatt_npu
52 | 


--------------------------------------------------------------------------------
/verl/model_merger/__main__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | This module is used to merge huggingface model and test verl checkpoints from FSDP and Megatron backends.
17 | 
18 | To merge FSDP checkpoints:
19 | ```sh
20 | python -m verl.model_merger merge \
21 |     --backend fsdp \
22 |     --local_dir checkpoints/verl_fsdp_gsm8k_examples/qwen2_5_0b5_fsdp_saveload/global_step_1/actor \
23 |     --target_dir /path/to/merged_hf_model
24 | ```
25 | 
26 | To merge Megatron checkpoints:
27 | ```sh
28 | python -m verl.model_merger merge \
29 |     --backend megatron \
30 |     --tie-word-embedding \
31 |     --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor \
32 |     --target_dir /path/to/merged_hf_model
33 | ```
34 | 
35 | For more details, please refer to documentation:
36 | https://verl.readthedocs.io/en/latest/advance/checkpoint.html#convert-fsdp-and-megatron-checkpoints-to-huggingface-format-model
37 | """
38 | 
39 | from .base_model_merger import generate_config_from_args, parse_args
40 | 
41 | 
42 | def main():
43 |     args = parse_args()
44 |     config = generate_config_from_args(args)
45 |     print(f"config: {config}")
46 | 
47 |     if config.backend == "fsdp":
48 |         from .fsdp_model_merger import FSDPModelMerger
49 | 
50 |         merger = FSDPModelMerger(config)
51 |     elif config.backend == "megatron":
52 |         from .megatron_model_merger import MegatronModelMerger
53 | 
54 |         merger = MegatronModelMerger(config)
55 |     else:
56 |         raise NotImplementedError(f"Unknown backend: {config.backend}")
57 | 
58 |     merger.merge_and_save()
59 |     merger.cleanup()
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/dist_checkpointing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from megatron.core import dist_checkpointing, mpu
16 | from megatron.core.dist_checkpointing.serialization import (
17 |     get_default_load_sharded_strategy,
18 |     get_default_save_sharded_strategy,
19 | )
20 | from megatron.core.dist_checkpointing.strategies.fully_parallel import (
21 |     FullyParallelLoadStrategyWrapper,
22 |     FullyParallelSaveStrategyWrapper,
23 | )
24 | 
25 | 
26 | def save_dist_checkpointing(sharded_state_dict, ckpt_path, async_save=False):
27 |     validate_sharding_integrity = True
28 |     # Get checkpointing strategies
29 |     save_strategy = get_default_save_sharded_strategy("torch_dist")
30 |     save_strategy = FullyParallelSaveStrategyWrapper(
31 |         save_strategy, mpu.get_data_parallel_group(with_context_parallel=True)
32 |     )
33 | 
34 |     # Save model sharded state dicts
35 |     async_save_request = dist_checkpointing.save(
36 |         sharded_state_dict,
37 |         ckpt_path,
38 |         sharded_strategy=save_strategy,
39 |         async_sharded_save=async_save,
40 |         validate_access_integrity=validate_sharding_integrity,
41 |     )
42 | 
43 |     return async_save_request
44 | 
45 | 
46 | def load_dist_checkpointing(sharded_state_dict, ckpt_dir):
47 |     # Get checkpointing strategies
48 |     load_strategy = get_default_load_sharded_strategy(ckpt_dir)
49 |     load_strategy = FullyParallelLoadStrategyWrapper(
50 |         load_strategy, mpu.get_data_parallel_group(with_context_parallel=True)
51 |     )
52 | 
53 |     # Load model sharded state dicts
54 |     state_dict = dist_checkpointing.load(sharded_state_dict, ckpt_dir, sharded_strategy=load_strategy)
55 | 
56 |     return state_dict
57 | 


--------------------------------------------------------------------------------
/verl/workers/actor/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for Actor
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | from typing import Dict
20 | 
21 | import torch
22 | 
23 | from verl import DataProto
24 | 
25 | __all__ = ["BasePPOActor"]
26 | 
27 | 
28 | class BasePPOActor(ABC):
29 |     def __init__(self, config):
30 |         """The base class for PPO actor
31 | 
32 |         Args:
33 |             config (DictConfig): a config passed to the PPOActor. We expect the type to be
34 |                 DictConfig (https://omegaconf.readthedocs.io/), but it can be any namedtuple in general.
35 |         """
36 |         super().__init__()
37 |         self.config = config
38 | 
39 |     @abstractmethod
40 |     def compute_log_prob(self, data: DataProto) -> torch.Tensor:
41 |         """Compute logits given a batch of data.
42 | 
43 |         Args:
44 |             data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```,
45 |                 ```attention_mask``` and ```position_ids```.
46 | 
47 |         Returns:
48 |             DataProto: a DataProto containing the key ```log_probs```
49 | 
50 | 
51 |         """
52 |         pass
53 | 
54 |     @abstractmethod
55 |     def update_policy(self, data: DataProto) -> Dict:
56 |         """Update the policy with an iterator of DataProto
57 | 
58 |         Args:
59 |             data (DataProto): an iterator over the DataProto that returns by
60 |                 ```make_minibatch_iterator```
61 | 
62 |         Returns:
63 |             Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model
64 |             such as ```loss```, ```grad_norm```, etc,.
65 | 
66 |         """
67 |         pass
68 | 


--------------------------------------------------------------------------------
/search/retrieval/read_npz_simple.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Simple NPZ file reader
 4 | 
 5 | Used to read a single NPZ file and view its contents
 6 | """
 7 | 
 8 | import numpy as np
 9 | import argparse
10 | 
11 | 
12 | def read_npz_file(npz_path):
13 |     """
14 |     Read NPZ file and display information
15 | 
16 |     Args:
17 |         npz_path: NPZ file path
18 |     """
19 |     print(f"Reading file: {npz_path}")
20 | 
21 |     data = np.load(npz_path)
22 | 
23 |     print(f"Keys in file: {list(data.keys())}")
24 | 
25 |     if "embeddings" in data:
26 |         embeddings = data["embeddings"]
27 | 
28 |         print(f"\nEmbeddings info:")
29 |         print(f"  Shape: {embeddings.shape}")
30 |         print(f"  Data type: {embeddings.dtype}")
31 |         print(f"  Memory usage: {embeddings.nbytes / (1024*1024):.1f} MB")
32 | 
33 |         if len(embeddings) > 0:
34 |             norms = np.linalg.norm(embeddings, axis=1)
35 |             means = np.mean(embeddings, axis=1)
36 | 
37 |             print(f"  L2 norm - mean: {np.mean(norms):.6f}, std: {np.std(norms):.6f}")
38 |             print(f"  L2 norm - min: {np.min(norms):.6f}, max: {np.max(norms):.6f}")
39 |             print(f"  Vector mean - mean: {np.mean(means):.6f}, std: {np.std(means):.6f}")
40 | 
41 |             n_show = min(3, len(embeddings))
42 |             print(f"\nFirst {n_show} vectors:")
43 |             for i in range(n_show):
44 |                 print(f"  Vector {i}: norm={np.linalg.norm(embeddings[i]):.6f}, first 5 values={embeddings[i][:5]}")
45 |     else:
46 |         print("'embeddings' key not found in file")
47 | 
48 |         for key in data.keys():
49 |             arr = data[key]
50 |             print(f"\nKey '{key}':")
51 |             print(f"  Shape: {arr.shape}")
52 |             print(f"  Data type: {arr.dtype}")
53 |             if arr.size <= 10:
54 |                 print(f"  Values: {arr}")
55 |             else:
56 |                 print(f"  First 10 values: {arr.flat[:10]}")
57 | 
58 | 
59 | def main():
60 |     parser = argparse.ArgumentParser(description="Read NPZ file")
61 |     parser.add_argument("npz_file", help="NPZ file path")
62 | 
63 |     args = parser.parse_args()
64 | 
65 |     try:
66 |         read_npz_file(args.npz_file)
67 |     except Exception as e:
68 |         print(f"Error: {e}")
69 |         return 1
70 | 
71 |     return 0
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     exit(main())
76 | 


--------------------------------------------------------------------------------
/verl/experimental/agent_loop/single_turn_agent_loop.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import logging
15 | import os
16 | from typing import Any, Dict, List
17 | from uuid import uuid4
18 | 
19 | from verl.experimental.agent_loop.agent_loop import AgentLoopBase, AgentLoopOutput
20 | from verl.utils.profiler import simple_timer
21 | 
22 | logger = logging.getLogger(__file__)
23 | logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN"))
24 | 
25 | 
26 | class SingleTurnAgentLoop(AgentLoopBase):
27 |     """Naive agent loop that only do single turn chat completion."""
28 | 
29 |     def __init__(self, config, server_manager, tokenizer):
30 |         super().__init__(config, server_manager, tokenizer)
31 |         self.prompt_length = config.actor_rollout_ref.rollout.prompt_length
32 |         self.response_length = config.actor_rollout_ref.rollout.response_length
33 | 
34 |     async def run(self, messages: List[Dict[str, Any]], sampling_params: Dict[str, Any]) -> AgentLoopOutput:
35 |         metrics = {}
36 |         request_id = uuid4().hex
37 |         prompt_ids = await self.loop.run_in_executor(
38 |             None, lambda: self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True)
39 |         )
40 | 
41 |         with simple_timer("generate_sequences", metrics):
42 |             response_ids = await self.server_manager.generate(
43 |                 request_id=request_id, prompt_ids=prompt_ids, sampling_params=sampling_params
44 |             )
45 |         response_mask = [1] * len(response_ids)
46 | 
47 |         output = AgentLoopOutput(
48 |             prompt_ids=prompt_ids,
49 |             response_ids=response_ids[: self.response_length],
50 |             response_mask=response_mask[: self.response_length],
51 |             num_turns=2,
52 |             metrics=metrics,
53 |         )
54 |         return output
55 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/prime_code/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Borrowed from: https://huggingface.co/spaces/codeparrot/apps_metric/blob/main/utils.py
16 | 
17 | import multiprocessing
18 | import os
19 | import sys
20 | import traceback
21 | from typing import Optional
22 | 
23 | from .testing_util import run_test
24 | 
25 | 
26 | def _temp_run(sample, generation, debug, result, metadata_list, timeout):
27 |     with open(os.devnull, "w") as devnull:
28 |         sys.stdout = devnull
29 |         sys.stderr = devnull
30 |         try:
31 |             res, metadata = run_test(in_outs=sample, test=generation, debug=debug, timeout=timeout)
32 |             result.append(res)
33 |             metadata_list.append(metadata)
34 |         except Exception:
35 |             # print(e) # some tracebacks are extremely long.
36 |             traceback.print_exc(10)
37 |             result.append([-1 for i in range(len(sample["inputs"]))])
38 |             metadata_list.append({})
39 | 
40 | 
41 | def check_correctness(in_outs: Optional[dict], generation, timeout=10, debug=True):
42 |     """Check correctness of code generation with a global timeout.
43 |     The global timeout is to catch some extreme/rare cases not handled by the timeouts
44 |     inside `run_test`"""
45 | 
46 |     manager = multiprocessing.Manager()
47 |     result = manager.list()
48 |     metadata_list = manager.list()
49 |     p = multiprocessing.Process(target=_temp_run, args=(in_outs, generation, debug, result, metadata_list, timeout))
50 |     p.start()
51 |     p.join(timeout=timeout + 1)
52 |     if p.is_alive():
53 |         p.kill()
54 |         # p.terminate()
55 |     if not result:
56 |         # consider that all tests failed
57 |         result = [[-1 for i in range(len(in_outs["inputs"]))]]
58 |         if debug:
59 |             print("global timeout")
60 |     return result[0], metadata_list
61 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/worker_group.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from verl.single_controller.base import ResourcePool, WorkerGroup
18 | 
19 | from .worker import DistGlobalInfo, DistRankInfo
20 | 
21 | 
22 | class MegatronWorkerGroup(WorkerGroup):
23 |     def __init__(self, resource_pool: ResourcePool, **kwargs):
24 |         super().__init__(resource_pool=resource_pool, **kwargs)
25 |         self._megatron_rank_info = None
26 |         self._megatron_global_info: DistGlobalInfo = None
27 | 
28 |     def init_megatron(self, default_megatron_kwargs: Dict = None):
29 |         raise NotImplementedError("MegatronWorkerGroup.init_megatron should be overwritten")
30 | 
31 |     def get_megatron_rank_info(self, rank: int) -> DistRankInfo:
32 |         assert 0 <= rank < self.world_size, f"rank must be from [0, world_size), Got {rank}"
33 |         return self._megatron_rank_info[rank]
34 | 
35 |     @property
36 |     def tp_size(self):
37 |         assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized"
38 |         return self._megatron_global_info.tp_size
39 | 
40 |     @property
41 |     def dp_size(self):
42 |         assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized"
43 |         return self._megatron_global_info.dp_size
44 | 
45 |     @property
46 |     def pp_size(self):
47 |         assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized"
48 |         return self._megatron_global_info.pp_size
49 | 
50 |     @property
51 |     def cp_size(self):
52 |         assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized"
53 |         return self._megatron_global_info.cp_size
54 | 
55 |     def get_megatron_global_info(self):
56 |         return self._megatron_global_info
57 | 


--------------------------------------------------------------------------------
/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | 
18 | import pkg_resources
19 | from packaging.version import parse as parse_version
20 | from pkg_resources import DistributionNotFound
21 | 
22 | from .protocol import DataProto
23 | from .utils.device import is_npu_available
24 | from .utils.logging_utils import set_basic_config
25 | 
26 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
27 | 
28 | with open(os.path.join(version_folder, "version/version")) as f:
29 |     __version__ = f.read().strip()
30 | 
31 | 
32 | set_basic_config(level=logging.WARNING)
33 | 
34 | 
35 | __all__ = ["DataProto", "__version__"]
36 | 
37 | if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true":
38 |     import importlib
39 | 
40 |     if importlib.util.find_spec("modelscope") is None:
41 |         raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`")
42 |     # Patch hub to download models from modelscope to speed up.
43 |     from modelscope.utils.hf_util import patch_hub
44 | 
45 |     patch_hub()
46 | 
47 | if is_npu_available:
48 |     from .models.transformers import npu_patch as npu_patch
49 | 
50 |     package_name = "transformers"
51 |     required_version_spec = "4.52.4"
52 |     try:
53 |         installed_version = pkg_resources.get_distribution(package_name).version
54 |         installed = parse_version(installed_version)
55 |         required = parse_version(required_version_spec)
56 | 
57 |         if not installed >= required:
58 |             raise ValueError(
59 |                 f"{package_name} version >= {required_version_spec} is required on ASCEND NPU, current version is "
60 |                 f"{installed}."
61 |             )
62 |     except DistributionNotFound as e:
63 |         raise ImportError(
64 |             f"package {package_name} is not installed, please run pip install {package_name}=={required_version_spec}"
65 |         ) from e
66 | 


--------------------------------------------------------------------------------
/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import is_dataclass
16 | from typing import Any, Dict, Optional, Type, Union
17 | 
18 | from omegaconf import DictConfig, OmegaConf
19 | 
20 | __all__ = ["omega_conf_to_dataclass"]
21 | 
22 | 
23 | def omega_conf_to_dataclass(config: Union[DictConfig, dict], dataclass_type: Optional[Type[Any]] = None) -> Any:
24 |     """
25 |     Convert an OmegaConf DictConfig to a dataclass.
26 | 
27 |     Args:
28 |         config: The OmegaConf DictConfig or dict to convert.
29 |         dataclass_type: The dataclass type to convert to. When dataclass_type is None,
30 |             the DictConfig must contain _target_ to be instantiated via hydra.instantiate API.
31 | 
32 |     Returns:
33 |         The dataclass instance.
34 |     """
35 |     if dataclass_type is not None and isinstance(config, dataclass_type):
36 |         return config
37 | 
38 |     if dataclass_type is None:
39 |         assert "_target_" in config, (
40 |             "When dataclass_type is not provided, config must contain _target_."
41 |             "See trainer/config/ppo_trainer.yaml algorithm section for an example."
42 |         )
43 |         from hydra.utils import instantiate
44 | 
45 |         return instantiate(config, _convert_="partial")
46 | 
47 |     if not is_dataclass(dataclass_type):
48 |         raise ValueError(f"{dataclass_type} must be a dataclass")
49 |     cfg = OmegaConf.create(config)  # in case it's a dict
50 |     cfg_from_dataclass = OmegaConf.structured(dataclass_type)
51 |     # let cfg override the existing vals in `cfg_from_dataclass`
52 |     cfg_merged = OmegaConf.merge(cfg_from_dataclass, cfg)
53 |     # now convert to `dataclass_type`
54 |     config_object = OmegaConf.to_object(cfg_merged)
55 |     return config_object
56 | 
57 | 
58 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
59 |     for key in dictionary:
60 |         if hasattr(config, key):
61 |             dictionary[key] = getattr(config, key)
62 | 


--------------------------------------------------------------------------------
/verl/utils/torch_dtypes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Adapted from Cruise.
16 | """
17 | 
18 | from typing import Union
19 | 
20 | import torch
21 | 
22 | HALF_LIST = [16, "16", "fp16", "float16", torch.float16]
23 | FLOAT_LIST = [32, "32", "fp32", "float32", torch.float32]
24 | BFLOAT_LIST = ["bf16", "bfloat16", torch.bfloat16]
25 | 
26 | 
27 | class PrecisionType:
28 |     """Type of precision used.
29 | 
30 |     >>> PrecisionType.HALF == 16
31 |     True
32 |     >>> PrecisionType.HALF in (16, "16")
33 |     True
34 |     """
35 | 
36 |     HALF = "16"
37 |     FLOAT = "32"
38 |     FULL = "64"
39 |     BFLOAT = "bf16"
40 |     MIXED = "mixed"
41 | 
42 |     @staticmethod
43 |     def supported_type(precision: Union[str, int]) -> bool:
44 |         return any(x == precision for x in PrecisionType)
45 | 
46 |     @staticmethod
47 |     def supported_types() -> list[str]:
48 |         return [x.value for x in PrecisionType]
49 | 
50 |     @staticmethod
51 |     def is_fp16(precision):
52 |         return precision in HALF_LIST
53 | 
54 |     @staticmethod
55 |     def is_fp32(precision):
56 |         return precision in FLOAT_LIST
57 | 
58 |     @staticmethod
59 |     def is_bf16(precision):
60 |         return precision in BFLOAT_LIST
61 | 
62 |     @staticmethod
63 |     def to_dtype(precision):
64 |         if precision in HALF_LIST:
65 |             return torch.float16
66 |         elif precision in FLOAT_LIST:
67 |             return torch.float32
68 |         elif precision in BFLOAT_LIST:
69 |             return torch.bfloat16
70 |         else:
71 |             raise RuntimeError(f"unexpected precision: {precision}")
72 | 
73 |     @staticmethod
74 |     def to_str(precision):
75 |         if precision == torch.float16:
76 |             return "fp16"
77 |         elif precision == torch.float32:
78 |             return "fp32"
79 |         elif precision == torch.bfloat16:
80 |             return "bf16"
81 |         else:
82 |             raise RuntimeError(f"unexpected precision: {precision}")
83 | 


--------------------------------------------------------------------------------
/verl/models/weight_loader_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def get_weight_loader(arch: str):
17 |     from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel
18 | 
19 |     _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {
20 |         "LlamaForCausalLM": load_state_dict_to_megatron_gptmodel,
21 |         "Qwen2ForCausalLM": load_state_dict_to_megatron_gptmodel,
22 |     }
23 | 
24 |     if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY:
25 |         return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch]
26 |     raise ValueError(
27 |         f"Model architectures {arch} loader are not supported for now. Supported architectures: "
28 |         f"{_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}"
29 |     )
30 | 
31 | 
32 | def get_weight_saver(arch: str):
33 |     from verl.models.mcore.saver import (
34 |         merge_megatron_ckpt_gptmodel,
35 |         merge_megatron_ckpt_gptmodel_dpskv3,
36 |         merge_megatron_ckpt_gptmodel_mixtral,
37 |         merge_megatron_ckpt_gptmodel_qwen2_5_vl,
38 |         merge_megatron_ckpt_gptmodel_qwen_moe,
39 |     )
40 | 
41 |     _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY = {
42 |         "LlamaForCausalLM": merge_megatron_ckpt_gptmodel,
43 |         "Qwen2ForCausalLM": merge_megatron_ckpt_gptmodel,
44 |         "MixtralForCausalLM": merge_megatron_ckpt_gptmodel_mixtral,
45 |         "Qwen2MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe,
46 |         "Qwen2_5_VLForConditionalGeneration": merge_megatron_ckpt_gptmodel_qwen2_5_vl,
47 |         "DeepseekV3ForCausalLM": merge_megatron_ckpt_gptmodel_dpskv3,
48 |         "Qwen3ForCausalLM": merge_megatron_ckpt_gptmodel,
49 |         "Qwen3MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe,
50 |     }
51 |     if arch in _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY:
52 |         return _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY[arch]
53 |     raise ValueError(
54 |         f"Model architectures {arch} saver are not supported for now. Supported architectures: "
55 |         f"{_MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY.keys()}"
56 |     )
57 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/gsm8k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | 
18 | def extract_solution(solution_str, method="strict"):
19 |     assert method in ["strict", "flexible"]
20 | 
21 |     if method == "strict":
22 |         # this also tests the formatting of the model
23 |         solutions = re.findall("#### (\\-?[0-9\\.\\,]+)", solution_str)
24 |         if len(solutions) == 0:
25 |             final_answer = None
26 |         else:
27 |             # take the last solution
28 |             final_answer = solutions[-1].replace(",", "").replace("$", "")
29 |     elif method == "flexible":
30 |         answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str)
31 |         final_answer = None
32 |         if len(answer) == 0:
33 |             # no reward is there is no answer
34 |             pass
35 |         else:
36 |             invalid_str = ["", "."]
37 |             # find the last number that is not '.'
38 |             for final_answer in reversed(answer):
39 |                 if final_answer not in invalid_str:
40 |                     break
41 |     return final_answer
42 | 
43 | 
44 | def compute_score(solution_str, ground_truth, method="strict", format_score=0.0, score=1.0):
45 |     """The scoring function for GSM8k.
46 | 
47 |     Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual
48 |     Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024.
49 | 
50 |     Args:
51 |         solution_str: the solution text
52 |         ground_truth: the ground truth
53 |         method: the method to extract the solution, choices are 'strict' and 'flexible'
54 |         format_score: the score for the format
55 |         score: the score for the correct answer
56 |     """
57 |     answer = extract_solution(solution_str=solution_str, method=method)
58 |     if answer is None:
59 |         return 0
60 |     else:
61 |         if answer == ground_truth:
62 |             return score
63 |         else:
64 |             return format_score
65 | 


--------------------------------------------------------------------------------
/utils/install_vllm_sglang_mcore.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | USE_MEGATRON=${USE_MEGATRON:-1}
 4 | USE_SGLANG=${USE_SGLANG:-1}
 5 | 
 6 | export MAX_JOBS=32
 7 | 
 8 | echo "1. install inference frameworks and pytorch they need"
 9 | if [ $USE_SGLANG -eq 1 ]; then
10 |     pip install "sglang[all]==0.4.6.post1" --no-cache-dir && pip install torch-memory-saver --no-cache-dir #--find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python 
11 | fi
12 | pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata
13 | 
14 | echo "2. install basic packages"
15 | pip install "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
16 |     "numpy<2.0.0" "pyarrow>=15.0.0" pandas \
17 |     ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \
18 |     pytest py-spy pyext pre-commit ruff
19 | 
20 | pip install "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1"
21 | 
22 | 
23 | echo "3. install FlashAttention and FlashInfer"
24 | # Install flash-attn-2.7.4.post1 (cxx11abi=False)
25 | wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
26 |     pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
27 | 
28 | # Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
29 | # vllm-0.8.3 does not support flashinfer>=0.2.3
30 | # see https://github.com/vllm-project/vllm/pull/15777
31 | wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
32 |     pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
33 | 
34 | 
35 | if [ $USE_MEGATRON -eq 1 ]; then
36 |     echo "4. install TransformerEngine and Megatron"
37 |     echo "Notice that TransformerEngine installation can take very long time, please be patient"
38 |     NVTE_FRAMEWORK=pytorch pip3 install --no-deps git+https://github.com/NVIDIA/TransformerEngine.git@v2.2
39 |     pip3 install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.0rc3
40 | fi
41 | 
42 | 
43 | echo "5. May need to fix opencv"
44 | pip install opencv-python
45 | pip install opencv-fixer && \
46 |     python -c "from opencv_fixer import AutoFix; AutoFix()"
47 | 
48 | 
49 | if [ $USE_MEGATRON -eq 1 ]; then
50 |     echo "6. Install cudnn python package (avoid being overridden)"
51 |     pip install nvidia-cudnn-cu12==9.8.0.87
52 | fi
53 | 
54 | echo "Successfully installed all packages"
55 | 


--------------------------------------------------------------------------------
/verl/utils/device.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # This code is inspired by the torchtune.
 4 | # https://github.com/pytorch/torchtune/blob/main/torchtune/utils/_device.py
 5 | #
 6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 7 | # All rights reserved.
 8 | #
 9 | # This source code is licensed under the BSD-style license in https://github.com/pytorch/torchtune/blob/main/LICENSE
10 | 
11 | import logging
12 | 
13 | import torch
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def is_torch_npu_available() -> bool:
19 |     """Check the availability of NPU"""
20 |     try:
21 |         import torch_npu  # noqa: F401
22 | 
23 |         return torch.npu.is_available()
24 |     except ImportError:
25 |         return False
26 | 
27 | 
28 | is_cuda_available = torch.cuda.is_available()
29 | is_npu_available = is_torch_npu_available()
30 | 
31 | 
32 | def get_visible_devices_keyword() -> str:
33 |     """Function that gets visible devices keyword name.
34 |     Returns:
35 |         'CUDA_VISIBLE_DEVICES' or `ASCEND_RT_VISIBLE_DEVICES`
36 |     """
37 |     return "CUDA_VISIBLE_DEVICES" if is_cuda_available else "ASCEND_RT_VISIBLE_DEVICES"
38 | 
39 | 
40 | def get_device_name() -> str:
41 |     """Function that gets the torch.device based on the current machine.
42 |     This currently only supports CPU, CUDA, NPU.
43 |     Returns:
44 |         device
45 |     """
46 |     if is_cuda_available:
47 |         device = "cuda"
48 |     elif is_npu_available:
49 |         device = "npu"
50 |     else:
51 |         device = "cpu"
52 |     return device
53 | 
54 | 
55 | def get_torch_device() -> any:
56 |     """Return the corresponding torch attribute based on the device type string.
57 |     Returns:
58 |         module: The corresponding torch device namespace, or torch.cuda if not found.
59 |     """
60 |     device_name = get_device_name()
61 |     try:
62 |         return getattr(torch, device_name)
63 |     except AttributeError:
64 |         logger.warning(f"Device namespace '{device_name}' not found in torch, try to load torch.cuda.")
65 |         return torch.cuda
66 | 
67 | 
68 | def get_device_id() -> int:
69 |     """Return current device id based on the device type.
70 |     Returns:
71 |         device index
72 |     """
73 |     return get_torch_device().current_device()
74 | 
75 | 
76 | def get_nccl_backend() -> str:
77 |     """Return nccl backend type based on the device type.
78 |     Returns:
79 |         nccl backend type string.
80 |     """
81 |     if is_cuda_available:
82 |         return "nccl"
83 |     elif is_npu_available:
84 |         return "hccl"
85 |     else:
86 |         raise RuntimeError(f"No available nccl backend found on device type {get_device_name()}.")
87 | 


--------------------------------------------------------------------------------
/verl/tools/mcp_search_tool.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import logging
17 | import os
18 | import re
19 | from typing import Tuple
20 | 
21 | from verl.tools.mcp_base_tool import MCPBaseTool
22 | 
23 | from .schemas import OpenAIFunctionToolSchema
24 | 
25 | logger = logging.getLogger(__name__)
26 | logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN"))
27 | 
28 | 
29 | class MCPSearchTool(MCPBaseTool):
30 |     def __init__(self, config: dict, tool_schema: OpenAIFunctionToolSchema):
31 |         super().__init__(config, tool_schema)
32 | 
33 |     def _parse_tool_result(self, content: list) -> Tuple[str, dict]:
34 |         res = ""
35 |         res_cnt = 0
36 |         query_list = []
37 |         metadata = {
38 |             "api_request_error": "",
39 |             "status": "unknown",
40 |             "total_results": 0,
41 |         }
42 |         try:
43 |             for part in content:
44 |                 if part.type != "text":
45 |                     continue
46 |                 text = part.text.replace("'", '"')
47 |                 query_match = re.search(r'query"\s*:\s*"([^"]+)"', text)
48 |                 query = query_match.group(1) if query_match else ""
49 |                 query_list.append(query)
50 | 
51 |                 title_matches = re.findall(r'"title"\s*:', text)
52 |                 title_count = len(title_matches)
53 | 
54 |                 results_match = re.search(r'"results"\s*:\s*(\[.*?\])', text, re.DOTALL)
55 |                 results_content = results_match.group(1) if results_match else ""
56 | 
57 |                 res += results_content
58 |                 res_cnt += title_count
59 |         except json.JSONDecodeError:
60 |             err_msg = "json parse error."
61 |             logger.error(err_msg)
62 |             metadata["api_request_error"] = err_msg
63 |             metadata["status"] = "error"
64 | 
65 |         # update metadata
66 |         metadata["status"] = "success"
67 |         metadata["queries"] = query_list
68 |         metadata["query_count"] = len(query_list)
69 |         metadata["total_results"] = res_cnt
70 |         return res, metadata
71 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/sglang_rollout/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import pickle
17 | from typing import Any, List, Optional
18 | 
19 | import numpy as np
20 | import torch
21 | import torch.distributed as dist
22 | 
23 | from verl.utils.device import get_device_name
24 | 
25 | 
26 | def broadcast_pyobj(
27 |     data: List[Any],
28 |     rank: int,
29 |     dist_group: Optional[torch.distributed.ProcessGroup] = None,
30 |     src: int = 0,
31 |     force_cpu_device: bool = False,
32 | ):
33 |     """from https://github.com/sgl-project/sglang/blob/844e2f227ab0cce6ef818a719170ce37b9eb1e1b/python/sglang/srt/utils.py#L905
34 | 
35 |     Broadcast inputs from src rank to all other ranks with torch.dist backend.
36 |     The `rank` here refer to the source rank on global process group (regardless
37 |     of dist_group argument).
38 |     """
39 |     device = torch.device(get_device_name() if not force_cpu_device else "cpu")
40 | 
41 |     if rank == src:
42 |         if len(data) == 0:
43 |             tensor_size = torch.tensor([0], dtype=torch.long, device=device)
44 |             dist.broadcast(tensor_size, src=src, group=dist_group)
45 |         else:
46 |             serialized_data = pickle.dumps(data)
47 |             size = len(serialized_data)
48 | 
49 |             tensor_data = torch.ByteTensor(np.frombuffer(serialized_data, dtype=np.uint8)).to(device)
50 |             tensor_size = torch.tensor([size], dtype=torch.long, device=device)
51 | 
52 |             dist.broadcast(tensor_size, src=src, group=dist_group)
53 |             dist.broadcast(tensor_data, src=src, group=dist_group)
54 |         return data
55 |     else:
56 |         tensor_size = torch.tensor([0], dtype=torch.long, device=device)
57 |         dist.broadcast(tensor_size, src=src, group=dist_group)
58 |         size = tensor_size.item()
59 | 
60 |         if size == 0:
61 |             return []
62 | 
63 |         tensor_data = torch.empty(size, dtype=torch.uint8, device=device)
64 |         dist.broadcast(tensor_data, src=src, group=dist_group)
65 | 
66 |         serialized_data = bytes(tensor_data.cpu().numpy())
67 |         data = pickle.loads(serialized_data)
68 |         return data
69 | 


--------------------------------------------------------------------------------
/scripts/train_grpo.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | WAND_PROJECT='ReSeek'
 3 | 
 4 | 
 5 | export BASE_MODEL='your/path/to/Qwen2.5-3B-Instruct'
 6 | 
 7 | export EXPERIMENT_NAME=ReSeek-nq_hotpotqa_train-qwen2.5-3b-it-em-grpo_max_turn1
 8 | set -x
 9 | 
10 | source /usr/local/Ascend/ascend-toolkit/set_env.sh
11 | source /usr/local/Ascend/nnal/atb/set_env.sh    
12 | export TORCHDYNAMO_DISABLE=1
13 | export TORCH_COMPILE_DISABLE=1
14 | 
15 | TRAIN_DATA_DIR=your/path/to/nq_search
16 | TEST_DATA_DIR=your/path/to/nq_search
17 | TIME_STAMP=$(date +%Y%m%d_%H%M%S)
18 | 
19 | PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
20 |     data.train_files=$TRAIN_DATA_DIR/train.parquet \
21 |     data.val_files=$TEST_DATA_DIR/test.parquet \
22 |     data.train_batch_size=512 \
23 |     data.val_batch_size=256 \
24 |     data.max_prompt_length=2048 \
25 |     data.max_response_length=500 \
26 |     data.max_start_length=2048 \
27 |     data.max_obs_length=500 \
28 |     data.shuffle=False \
29 |     algorithm.adv_estimator=grpo \
30 |     actor_rollout_ref.model.path=$BASE_MODEL \
31 |     actor_rollout_ref.model.enable_gradient_checkpointing=true \
32 |     actor_rollout_ref.model.use_remove_padding=True \
33 |     actor_rollout_ref.actor.optim.lr=1e-6 \
34 |     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
35 |     actor_rollout_ref.actor.use_kl_loss=true \
36 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
37 |     actor_rollout_ref.actor.ppo_micro_batch_size=64 \
38 |     actor_rollout_ref.actor.fsdp_config.param_offload=true \
39 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
40 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
41 |     actor_rollout_ref.rollout.name=vllm \
42 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
43 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
44 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
45 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
46 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
47 |     actor_rollout_ref.rollout.n_agent=5 \
48 |     actor_rollout_ref.rollout.temperature=1 \
49 |     algorithm.no_think_rl=false \
50 |     trainer.logger=['console','tensorboard'] \
51 |     trainer.balance_batch=false \
52 |     trainer.val_only=true \
53 |     trainer.val_before_train=true \
54 |     trainer.default_hdfs_dir=null \
55 |     trainer.n_gpus_per_node=1 \
56 |     trainer.nnodes=1 \
57 |     trainer.save_freq=100 \
58 |     trainer.test_freq=100 \
59 |     trainer.project_name=$WAND_PROJECT \
60 |     trainer.experiment_name=$EXPERIMENT_NAME \
61 |     trainer.total_epochs=1 \
62 |     trainer.default_hdfs_dir=null \
63 |     reward_model.reward_manager=naive \
64 |     max_turns=1 \
65 |     retriever.url="your/path/to/retrieve" \
66 |     retriever.topk=3 \
67 |     2>&1 | tee logs/$EXPERIMENT_NAME_$TIME_STAMP.log
68 | #    trainer.device=npu
69 | # trainer.total_training_steps=1005 \


--------------------------------------------------------------------------------
/verl/base_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import collections
16 | from dataclasses import fields  # Import the fields function to inspect dataclass fields
17 | from typing import Any
18 | 
19 | 
20 | # BaseConfig class inherits from collections.abc.Mapping, which means it can act like a dictionary
21 | class BaseConfig(collections.abc.Mapping):
22 |     """The BaseConfig provides omegaconf DictConfig-like interface for a dataclass config.
23 | 
24 |     The BaseConfig class implements the Mapping Abstract Base Class.
25 |     This allows instances of this class to be used like dictionaries.
26 |     """
27 | 
28 |     def get(self, key: str, default: Any = None) -> Any:
29 |         """Get the value associated with the given key. If the key does not exist, return the default value.
30 | 
31 |         Args:
32 |             key (str): The attribute name to retrieve.
33 |             default (Any, optional): The value to return if the attribute does not exist. Defaults to None.
34 | 
35 |         Returns:
36 |             Any: The value of the attribute or the default value.
37 |         """
38 |         try:
39 |             return getattr(self, key)
40 |         except AttributeError:
41 |             return default
42 | 
43 |     def __getitem__(self, key: str):
44 |         """Implement the [] operator for the class. Allows accessing attributes like dictionary items.
45 | 
46 |         Args:
47 |             key (str): The attribute name to retrieve.
48 | 
49 |         Returns:
50 |             Any: The value of the attribute.
51 | 
52 |         Raises:
53 |             AttributeError: If the attribute does not exist.
54 |             TypeError: If the key type is not string
55 |         """
56 |         return getattr(self, key)
57 | 
58 |     def __iter__(self):
59 |         """Implement the iterator protocol. Allows iterating over the attribute names of the instance.
60 | 
61 |         Yields:
62 |             str: The name of each field in the dataclass.
63 |         """
64 |         for f in fields(self):
65 |             yield f.name
66 | 
67 |     def __len__(self):
68 |         """
69 |         Return the number of fields in the dataclass.
70 | 
71 |         Returns:
72 |             int: The number of fields in the dataclass.
73 |         """
74 |         return len(fields(self))
75 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/parallel_linear.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/linear.py
15 | 
16 | 
17 | from megatron.core import tensor_parallel
18 | 
19 | 
20 | class QKVParallelLinear(tensor_parallel.ColumnParallelLinear):
21 |     def __init__(
22 |         self,
23 |         input_size,
24 |         num_heads,
25 |         num_key_value_heads,
26 |         head_dim,
27 |         *,
28 |         bias=True,
29 |         gather_output=True,
30 |         skip_bias_add=False,
31 |         **kwargs,
32 |     ):
33 |         # Keep input parameters, and already restrict the head numbers
34 |         self.input_size = input_size
35 |         self.q_output_size = num_heads * head_dim
36 |         self.kv_output_size = num_key_value_heads * head_dim
37 |         self.head_dim = head_dim
38 |         self.gather_output = gather_output
39 |         self.skip_bias_add = skip_bias_add
40 | 
41 |         input_size = self.input_size
42 |         output_size = (num_heads + 2 * num_key_value_heads) * self.head_dim
43 | 
44 |         super().__init__(
45 |             input_size=input_size,
46 |             output_size=output_size,
47 |             bias=bias,
48 |             gather_output=gather_output,
49 |             skip_bias_add=skip_bias_add,
50 |             **kwargs,
51 |         )
52 | 
53 | 
54 | class MergedColumnParallelLinear(tensor_parallel.ColumnParallelLinear):
55 |     def __init__(
56 |         self,
57 |         input_size,
58 |         gate_ouput_size,
59 |         up_output_size,
60 |         *,
61 |         bias=True,
62 |         gather_output=True,
63 |         skip_bias_add=False,
64 |         **kwargs,
65 |     ):
66 |         # Keep input parameters, and already restrict the head numbers
67 |         self.input_size = input_size
68 |         self.output_size = gate_ouput_size + up_output_size
69 |         self.gather_output = gather_output
70 |         self.skip_bias_add = skip_bias_add
71 | 
72 |         super().__init__(
73 |             input_size=self.input_size,
74 |             output_size=self.output_size,
75 |             bias=bias,
76 |             gather_output=gather_output,
77 |             skip_bias_add=skip_bias_add,
78 |             **kwargs,
79 |         )
80 | 


--------------------------------------------------------------------------------
/verl/utils/rendezvous/ray_backend.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import time
17 | 
18 | import ray
19 | from cupy.cuda.nccl import NcclCommunicator, get_unique_id
20 | from ray.util import list_named_actors
21 | 
22 | 
23 | @ray.remote
24 | class NCCLIDStore:
25 |     def __init__(self, nccl_id):
26 |         self._nccl_id = nccl_id
27 | 
28 |     def get(self):
29 |         return self._nccl_id
30 | 
31 | 
32 | def get_nccl_id_store_by_name(name):
33 |     all_actors = list_named_actors(all_namespaces=True)
34 |     matched_actors = [actor for actor in all_actors if actor.get("name", None) == name]
35 |     if len(matched_actors) == 1:
36 |         actor = matched_actors[0]
37 |         return ray.get_actor(**actor)
38 |     elif len(matched_actors) > 1:
39 |         logging.warning("multiple actors with same name found: %s", matched_actors)
40 |     elif len(matched_actors) == 0:
41 |         logging.info("failed to get any actor named %s", name)
42 |     return None
43 | 
44 | 
45 | def create_nccl_communicator_in_ray(
46 |     rank: int, world_size: int, group_name: str, max_retries: int = 100, interval_s: int = 5
47 | ):
48 |     if rank == 0:
49 |         nccl_id = get_unique_id()
50 |         nccl_id_store = NCCLIDStore.options(name=group_name).remote(nccl_id)
51 | 
52 |         assert ray.get(nccl_id_store.get.remote()) == nccl_id
53 |         communicator = NcclCommunicator(
54 |             ndev=world_size,
55 |             commId=nccl_id,
56 |             rank=0,
57 |         )
58 |         return communicator
59 |     else:
60 |         for i in range(max_retries):
61 |             nccl_id_store = get_nccl_id_store_by_name(group_name)
62 |             if nccl_id_store is not None:
63 |                 logging.info("nccl_id_store %s got", group_name)
64 |                 nccl_id = ray.get(nccl_id_store.get.remote())
65 |                 logging.info("nccl id for %s got: %s", group_name, nccl_id)
66 |                 communicator = NcclCommunicator(
67 |                     ndev=world_size,
68 |                     commId=nccl_id,
69 |                     rank=rank,
70 |                 )
71 |                 return communicator
72 |             logging.info("failed to get nccl_id for %d time, sleep for %d seconds", i + 1, interval_s)
73 |             time.sleep(interval_s)
74 | 


--------------------------------------------------------------------------------
/verl/tools/schemas.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import json
16 | from typing import Any, Literal
17 | 
18 | from pydantic import BaseModel
19 | 
20 | 
21 | class OpenAIFunctionPropertySchema(BaseModel):
22 |     """The schema of a parameter in OpenAI format."""
23 | 
24 |     type: str
25 |     description: str | None = None
26 |     enum: list[str] | None = None
27 | 
28 | 
29 | class OpenAIFunctionParametersSchema(BaseModel):
30 |     """The schema of parameters in OpenAI format."""
31 | 
32 |     type: str
33 |     properties: dict[str, OpenAIFunctionPropertySchema]
34 |     required: list[str]
35 | 
36 | 
37 | class OpenAIFunctionSchema(BaseModel):
38 |     """The schema of a function in OpenAI format."""
39 | 
40 |     name: str
41 |     description: str
42 |     parameters: OpenAIFunctionParametersSchema
43 |     strict: bool = False
44 | 
45 | 
46 | class OpenAIFunctionToolSchema(BaseModel):
47 |     """The schema of a tool in OpenAI format."""
48 | 
49 |     type: str
50 |     function: OpenAIFunctionSchema
51 | 
52 | 
53 | class OpenAIFunctionParsedSchema(BaseModel):
54 |     """The parsed schema of a tool in OpenAI format."""
55 | 
56 |     name: str
57 |     arguments: str  # JSON string
58 | 
59 | 
60 | class OpenAIFunctionCallSchema(BaseModel):
61 |     """The parsed schema of a tool in OpenAI format."""
62 | 
63 |     name: str
64 |     arguments: dict[str, Any]
65 | 
66 |     @staticmethod
67 |     def from_openai_function_parsed_schema(
68 |         parsed_schema: OpenAIFunctionParsedSchema,
69 |     ) -> tuple["OpenAIFunctionCallSchema", bool]:
70 |         has_decode_error = False
71 |         try:
72 |             arguments = json.loads(parsed_schema.arguments)
73 |         except json.JSONDecodeError:
74 |             arguments = {}
75 |             has_decode_error = True
76 |         # If the arguments is not a dict, it means the arguments is not a valid JSON string
77 |         if not isinstance(arguments, dict):
78 |             arguments = {}
79 |             has_decode_error = True
80 | 
81 |         return OpenAIFunctionCallSchema(name=parsed_schema.name, arguments=arguments), has_decode_error
82 | 
83 | 
84 | class OpenAIFunctionToolCall(BaseModel):
85 |     """The tool call in OpenAI format."""
86 | 
87 |     id: str
88 |     type: Literal["function"] = "function"
89 |     function: OpenAIFunctionCallSchema
90 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/pipeline_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | from megatron.core import parallel_state as mpu
18 | 
19 | from .sequence_parallel import pad_to_sequence_parallel
20 | 
21 | 
22 | def compute_transformers_input_shapes(batches, meta_info):
23 |     from flash_attn.bert_padding import unpad_input  # flash 2 is a must for Megatron
24 | 
25 |     # pre-compute input shapes for each micro-batch at each pp stage
26 |     input_shapes = []
27 |     for model_inputs in batches:
28 |         input_ids = model_inputs["input_ids"]
29 |         attention_mask = model_inputs["attention_mask"]
30 |         input_ids_rmpad = unpad_input(input_ids.unsqueeze(dim=-1), attention_mask)[0]  # (total_nnz, 1)
31 |         if meta_info["sequence_parallel"]:
32 |             input_ids_rmpad = pad_to_sequence_parallel(input_ids_rmpad)
33 |             # compute shapes for model_inputs
34 |             input_shapes.append(
35 |                 torch.Size(
36 |                     [
37 |                         input_ids_rmpad.shape[0] // mpu.get_tensor_model_parallel_world_size(),
38 |                         1,
39 |                         meta_info["hidden_size"],
40 |                     ]
41 |                 )
42 |             )
43 |         else:
44 |             # compute shapes for model_inputs
45 |             input_shapes.append(torch.Size([input_ids_rmpad.shape[0], 1, meta_info["hidden_size"]]))
46 |     return input_shapes
47 | 
48 | 
49 | def make_batch_generator(batches, vpp_size):
50 |     """
51 |     Creates a batch generator suitable for Megatron pipeline parallelism,
52 |     handling virtual pipeline parallelism (VPP).
53 | 
54 |     If VPP is used (vpp_size > 1), it duplicates the batch iterator for each
55 |     virtual pipeline stage. Otherwise, it returns a single iterator.
56 | 
57 |     Args:
58 |         batches: An iterable (e.g., list) of micro-batches.
59 |         vpp_size (int): The virtual pipeline model parallel size.
60 | 
61 |     Returns:
62 |         An iterator or a list of iterators over the micro-batches.
63 |     """
64 |     if vpp_size > 1:
65 |         # has vpp
66 |         batch_generator = [batches] * vpp_size  # number of vpp chunks
67 |         batch_generator = [iter(b) for b in batch_generator]
68 |     else:
69 |         # no vpp
70 |         batch_generator = iter(batches)
71 |     return batch_generator
72 | 


--------------------------------------------------------------------------------
/verl/trainer/config/algorithm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass, field
16 | from typing import Optional
17 | 
18 | from verl.base_config import BaseConfig
19 | 
20 | 
21 | @dataclass(frozen=True)
22 | class KLControlConfig(BaseConfig):
23 |     """Configuration for KL control."""
24 | 
25 |     type: str = "fixed"  # "fixed" or "adaptive"
26 |     kl_coef: float = 0.001  # Initial coefficient for KL penalty
27 |     horizon: int = 10000  # Horizon value for adaptive controller
28 |     target_kl: float = 0.1  # Target KL divergence for adaptive controller
29 | 
30 | 
31 | @dataclass(frozen=True)
32 | class PFPPOConfig(BaseConfig):
33 |     """Configuration for preference feedback PPO."""
34 | 
35 |     reweight_method: str = "pow"  # "pow", "max_min", or "max_random"
36 |     weight_pow: float = 2.0  # Power used for weight scaling in "pow" method
37 | 
38 | 
39 | @dataclass(frozen=True)
40 | class FilterGroupsConfig(BaseConfig):
41 |     """Configuration for filter groups (used in DAPO and Entropy)."""
42 | 
43 |     enable: bool = False  # Whether to enable filter groups
44 |     metric: Optional[str] = None  # Metric to use for filtering: "acc", "score", "seq_reward", "seq_final_reward", etc.
45 |     max_num_gen_batches: int = 0  # Non-positive values mean no upper limit
46 | 
47 | 
48 | @dataclass(frozen=True)
49 | class AlgoConfig(BaseConfig):
50 |     """Configuration for the algorithm."""
51 | 
52 |     gamma: float = 1.0  # Discount factor for future rewards
53 |     lam: float = 1.0  # Trade-off between bias and variance in the GAE estimator
54 |     adv_estimator: str = "gae"  # Advantage estimator type: "gae", "grpo", "reinforce_plus_plus", etc.
55 |     norm_adv_by_std_in_grpo: bool = True  # Whether to normalize advantages by std (specific to GRPO)
56 |     use_kl_in_reward: bool = False  # Whether to enable in-reward KL penalty
57 |     kl_penalty: str = "kl"  # How to estimate KL divergence: "kl", "abs", "mse", "low_var_kl", or "full"
58 |     kl_ctrl: KLControlConfig = field(default_factory=KLControlConfig)  # KL control configuration
59 |     use_pf_ppo: bool = False  # Whether to enable preference feedback PPO
60 |     pf_ppo: Optional[PFPPOConfig] = None  # Preference feedback PPO settings
61 | 
62 |     # Filter groups parameters (used in DAPO and Entropy)
63 |     filter_groups: Optional[FilterGroupsConfig] = None  # Filter groups configuration
64 |     no_think_rl: bool = True  # Whether to enable no-think RL
65 | 


--------------------------------------------------------------------------------
/verl/trainer/main_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Offline evaluate the performance of a generated file using reward model and ground truth verifier.
16 | The input is a parquet file that contains N generated sequences and (optional) the ground truth.
17 | 
18 | """
19 | 
20 | from collections import defaultdict
21 | 
22 | import hydra
23 | import numpy as np
24 | import pandas as pd
25 | import ray
26 | from tqdm import tqdm
27 | 
28 | from verl.trainer.ppo.reward import get_custom_reward_fn
29 | from verl.utils.fs import copy_to_local
30 | 
31 | 
32 | @ray.remote
33 | def process_item(reward_fn, data_source, response_lst, reward_data):
34 |     ground_truth = reward_data["ground_truth"]
35 |     score_lst = [reward_fn(data_source, r, ground_truth) for r in response_lst]
36 |     return data_source, np.mean(score_lst)
37 | 
38 | 
39 | @hydra.main(config_path="config", config_name="evaluation", version_base=None)
40 | def main(config):
41 |     local_path = copy_to_local(config.data.path, use_shm=config.data.get("use_shm", False))
42 |     dataset = pd.read_parquet(local_path)
43 |     responses = dataset[config.data.response_key]
44 |     data_sources = dataset[config.data.data_source_key]
45 |     reward_model_data = dataset[config.data.reward_model_key]
46 | 
47 |     total = len(dataset)
48 | 
49 |     # Initialize Ray
50 |     if not ray.is_initialized():
51 |         ray.init(num_cpus=config.ray_init.num_cpus)
52 | 
53 |     # evaluate test_score based on data source
54 |     data_source_reward = defaultdict(list)
55 |     compute_score = get_custom_reward_fn(config)
56 | 
57 |     # Create remote tasks
58 |     remote_tasks = [
59 |         process_item.remote(compute_score, data_sources[i], responses[i], reward_model_data[i]) for i in range(total)
60 |     ]
61 | 
62 |     # Process results as they come in
63 |     with tqdm(total=total) as pbar:
64 |         while len(remote_tasks) > 0:
65 |             # Use ray.wait to get completed tasks
66 |             done_ids, remote_tasks = ray.wait(remote_tasks)
67 |             for result_id in done_ids:
68 |                 data_source, score = ray.get(result_id)
69 |                 data_source_reward[data_source].append(score)
70 |                 pbar.update(1)
71 | 
72 |     metric_dict = {}
73 |     for data_source, rewards in data_source_reward.items():
74 |         metric_dict[f"test_score/{data_source}"] = np.mean(rewards)
75 | 
76 |     print(metric_dict)
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/fsdp_ulysses.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains a resharding manager that binds weights from FSDP zero3 to XPerfGPT
16 | """
17 | 
18 | from torch.distributed.device_mesh import DeviceMesh
19 | 
20 | from verl import DataProto
21 | from verl.protocol import all_gather_data_proto
22 | from verl.utils.ulysses import get_ulysses_sequence_parallel_group, set_ulysses_sequence_parallel_group
23 | 
24 | from .base import BaseShardingManager
25 | 
26 | 
27 | class FSDPUlyssesShardingManager(BaseShardingManager):
28 |     """
29 |     Sharding manager to support data resharding when using FSDP + Ulysses
30 |     """
31 | 
32 |     def __init__(self, device_mesh: DeviceMesh):
33 |         super().__init__()
34 |         self.device_mesh = device_mesh
35 |         self.seed_offset = 12345
36 | 
37 |     def __enter__(self):
38 |         if self.device_mesh is not None:
39 |             # We have a global SP group
40 |             # so we have to change to use model-specific sp group
41 |             self.prev_sp_group = get_ulysses_sequence_parallel_group()
42 |             set_ulysses_sequence_parallel_group(self.device_mesh["sp"].get_group())
43 |             # TODO: check how to set seed for each model
44 | 
45 |     def __exit__(self, exc_type, exc_value, traceback):
46 |         # restore random states
47 |         if self.device_mesh is not None:
48 |             # revert to previous sp group
49 |             set_ulysses_sequence_parallel_group(self.prev_sp_group)
50 |             # TODO: check how to set seed for each model
51 | 
52 |     def preprocess_data(self, data: DataProto) -> DataProto:
53 |         """
54 |         AllGather data from sp region
55 |         This is because the data is first sharded along the FSDP dimension as we utilize the DP_COMPUTE
56 |         In Ulysses, we need to make sure the same data is used across a SP group
57 |         """
58 |         if self.device_mesh is not None:
59 |             group = self.device_mesh["sp"].get_group()
60 | 
61 |             all_gather_data_proto(data=data, process_group=group)
62 |         return data
63 | 
64 |     def postprocess_data(self, data: DataProto) -> DataProto:
65 |         """
66 |         Split the data to follow FSDP partition
67 |         """
68 |         if self.device_mesh is not None:
69 |             sp_size = self.device_mesh["sp"].size()
70 |             sp_rank = self.device_mesh["sp"].get_local_rank()
71 |             data = data.chunk(chunks=sp_size)[sp_rank]
72 |         return data
73 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from megatron.core.optimizer import OptimizerConfig
17 | from megatron.core.optimizer import get_megatron_optimizer as get_megatron_optimizer_native
18 | from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
19 | 
20 | 
21 | def get_megatron_optimizer(
22 |     model,
23 |     config: OptimizerConfig,
24 |     no_weight_decay_cond=None,
25 |     scale_lr_cond=None,
26 |     lr_mult=1.0,
27 | ):
28 |     # Base optimizer.
29 |     return get_megatron_optimizer_native(
30 |         config=config,
31 |         model_chunks=model,
32 |         no_weight_decay_cond=no_weight_decay_cond,
33 |         scale_lr_cond=scale_lr_cond,
34 |         lr_mult=lr_mult,
35 |     )
36 | 
37 | 
38 | def get_megatron_optimizer_param_scheduler(
39 |     optimizer,
40 |     config,
41 | ):
42 |     """
43 |     Get the optimizer parameter scheduler for Megatron.
44 |     """
45 |     if config.get("lr_decay_steps", None) is None:
46 |         config.lr_decay_steps = config.total_training_steps
47 |     wsd_decay_steps = None
48 |     if config.get("lr_wsd_decay_steps", None) is not None:
49 |         wsd_decay_steps = config.lr_wsd_decay_steps
50 |     if config.get("lr_warmup_steps_ratio", None) is not None and (
51 |         config.get("lr_warmup_steps", None) is None or config.lr_warmup_steps <= 0
52 |     ):
53 |         config.lr_warmup_steps = int(config.lr_warmup_steps_ratio * config.lr_decay_steps)
54 | 
55 |     opt_param_scheduler = OptimizerParamScheduler(
56 |         optimizer,
57 |         init_lr=config.lr_warmup_init,
58 |         max_lr=config.lr,
59 |         min_lr=config.min_lr,
60 |         lr_warmup_steps=config.lr_warmup_steps,
61 |         lr_decay_steps=config.lr_decay_steps,
62 |         lr_decay_style=config.lr_decay_style,
63 |         start_wd=config.weight_decay,
64 |         end_wd=config.weight_decay,
65 |         wd_incr_steps=config.total_training_steps,
66 |         wd_incr_style=config.weight_decay_incr_style,
67 |         use_checkpoint_opt_param_scheduler=config.use_checkpoint_opt_param_scheduler,
68 |         override_opt_param_scheduler=(not config.use_checkpoint_opt_param_scheduler),
69 |         wsd_decay_steps=wsd_decay_steps,
70 |         lr_wsd_decay_style=config.lr_wsd_decay_style,
71 |     )
72 | 
73 |     return opt_param_scheduler
74 | 
75 | 
76 | def get_megatron_last_lr(optimizer):
77 |     """
78 |     Get the last learning rate from the optimizer parameter scheduler.
79 |     """
80 |     return optimizer.param_groups[0]["lr"]
81 | 


--------------------------------------------------------------------------------
/verl/interactions/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023-2024 SGLang Team
 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | from typing import Any, Dict, List, Optional, Tuple
17 | from uuid import uuid4
18 | 
19 | 
20 | class BaseInteraction:
21 |     def __init__(self, config: Dict[str, Any]):
22 |         self.config = config
23 |         self.name: str = config.get("name", "interaction_agent")  # More general agent default role name
24 | 
25 |     async def start_interaction(self, instance_id: Optional[str] = None, **kwargs) -> str:
26 |         """Create a tool instance.
27 | 
28 |         Args:
29 |             instance_id: The instance id of the tool.
30 | 
31 |         Returns:
32 |             The instance id of the tool.
33 |         """
34 |         if instance_id is None:
35 |             return str(uuid4())
36 |         else:
37 |             return instance_id
38 | 
39 |     async def generate_response(
40 |         self, instance_id: str, messages: List[Dict[str, Any]], **kwargs
41 |     ) -> Tuple[bool, str, float, Dict[str, Any]]:  # More clear response generation method
42 |         """
43 |         Generates a response for the current turn of interaction.
44 |         Returns a tuple containing:
45 |         - should_terminate_sequence (bool): True if the interaction sequence should end.
46 |         - response_content (str): The textual content of the response.
47 |         - current_turn_score (float): The score for this specific turn/response.
48 |         - additional_data (dict): Any extra information or metadata.
49 |         """
50 |         should_terminate_sequence: bool = False  # if True, end rollout
51 |         response_content: str = "Your current result seems acceptable."
52 |         current_turn_score: float = 0.8
53 |         additional_data: Dict[str, Any] = {}
54 |         return should_terminate_sequence, response_content, current_turn_score, additional_data
55 | 
56 |     async def calculate_score(self) -> float:  # More clear score calculation method
57 |         """
58 |         Calculates a score for the interaction,
59 |         potentially considering aspects like partial exposure & in-context task switching.
60 |         should be invoke at turn-level
61 |         """
62 |         # ...implement the logic to calculate turn-level score...
63 |         score = 0.0
64 |         return score
65 | 
66 |     async def finalize_interaction(self) -> None:  # More clear interaction end and resource release method
67 |         """
68 |         Finalizes the interaction session and releases any associated state or resources.
69 |         Simulates: release state
70 |         """
71 |         # ...implement the logic to release state...
72 |         pass
73 | 


--------------------------------------------------------------------------------