├── verl
    ├── verl
    │   ├── py.typed
    │   ├── version
    │   │   └── version
    │   ├── trainer
    │   │   ├── runtime_env.yaml
    │   │   ├── config
    │   │   │   ├── evaluation.yaml
    │   │   │   ├── __init__.py
    │   │   │   ├── ref
    │   │   │   │   ├── megatron_ref.yaml
    │   │   │   │   ├── dp_ref.yaml
    │   │   │   │   └── ref.yaml
    │   │   │   ├── npu_profile
    │   │   │   │   └── npu_profile.yaml
    │   │   │   ├── reward_model
    │   │   │   │   ├── dp_reward_model.yaml
    │   │   │   │   └── megatron_reward_model.yaml
    │   │   │   ├── generation.yaml
    │   │   │   ├── actor
    │   │   │   │   └── dp_actor.yaml
    │   │   │   └── sft_trainer.yaml
    │   │   ├── __init__.py
    │   │   ├── ppo
    │   │   │   └── __init__.py
    │   │   ├── constants_ppo.py
    │   │   └── main_eval.py
    │   ├── workers
    │   │   ├── rollout
    │   │   │   ├── sglang_rollout
    │   │   │   │   ├── test.py
    │   │   │   │   └── __init__.py
    │   │   │   ├── naive
    │   │   │   │   └── __init__.py
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   └── vllm_rollout
    │   │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── engine
    │   │   │   ├── megatron
    │   │   │   │   └── __init__.py
    │   │   │   ├── fsdp
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── utils.py
    │   │   │   └── __init__.py
    │   │   ├── sharding_manager
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   └── fsdp_ulysses.py
    │   │   ├── roles
    │   │   │   ├── __init__.py
    │   │   │   └── actor.py
    │   │   ├── reward_model
    │   │   │   ├── __init__.py
    │   │   │   ├── megatron
    │   │   │   │   └── __init__.py
    │   │   │   └── base.py
    │   │   ├── actor
    │   │   │   ├── __init__.py
    │   │   │   └── base.py
    │   │   ├── critic
    │   │   │   ├── __init__.py
    │   │   │   └── base.py
    │   │   ├── config
    │   │   │   └── __init__.py
    │   │   └── reward_manager
    │   │   │   ├── __init__.py
    │   │   │   ├── abstract.py
    │   │   │   └── registry.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── llama
    │   │   │   ├── __init__.py
    │   │   │   └── megatron
    │   │   │   │   ├── checkpoint_utils
    │   │   │   │       └── __init__.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── layers
    │   │   │   │       ├── __init__.py
    │   │   │   │       └── parallel_rmsnorm.py
    │   │   ├── qwen2
    │   │   │   ├── __init__.py
    │   │   │   └── megatron
    │   │   │   │   ├── checkpoint_utils
    │   │   │   │       └── __init__.py
    │   │   │   │   ├── layers
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── parallel_rmsnorm.py
    │   │   │   │       └── parallel_linear.py
    │   │   │   │   └── __init__.py
    │   │   ├── transformers
    │   │   │   ├── __init__.py
    │   │   │   └── npu_patch.py
    │   │   ├── mcore
    │   │   │   ├── qwen2_5_vl
    │   │   │   │   └── __init__.py
    │   │   │   ├── mbridge.py
    │   │   │   └── __init__.py
    │   │   ├── README.md
    │   │   ├── registry.py
    │   │   └── weight_loader_registry.py
    │   ├── experimental
    │   │   ├── __init__.py
    │   │   ├── dataset
    │   │   │   ├── __init__.py
    │   │   │   └── sampler.py
    │   │   ├── dynamic_dataset
    │   │   │   └── __init__.py
    │   │   └── agent_loop
    │   │   │   ├── __init__.py
    │   │   │   └── single_turn_agent_loop.py
    │   ├── model_merger
    │   │   ├── __init__.py
    │   │   └── __main__.py
    │   ├── third_party
    │   │   ├── __init__.py
    │   │   ├── sglang
    │   │   │   └── __init__.py
    │   │   └── vllm
    │   │   │   └── __init__.py
    │   ├── utils
    │   │   ├── megatron
    │   │   │   ├── __init__.py
    │   │   │   ├── memory.py
    │   │   │   ├── sequence_parallel.py
    │   │   │   ├── dist_checkpointing.py
    │   │   │   └── pipeline_parallel.py
    │   │   ├── checkpoint
    │   │   │   └── __init__.py
    │   │   ├── experimental
    │   │   │   └── __init__.py
    │   │   ├── rendezvous
    │   │   │   ├── __init__.py
    │   │   │   └── ray_backend.py
    │   │   ├── metric
    │   │   │   ├── __init__.py
    │   │   │   └── utils.py
    │   │   ├── debug
    │   │   │   ├── __init__.py
    │   │   │   └── performance.py
    │   │   ├── reward_score
    │   │   │   ├── prime_code
    │   │   │   │   ├── README.md
    │   │   │   │   └── utils.py
    │   │   │   ├── math_batch.py
    │   │   │   ├── geo3k.py
    │   │   │   ├── math_verify.py
    │   │   │   └── gsm8k.py
    │   │   ├── dataset
    │   │   │   ├── __init__.py
    │   │   │   └── README.md
    │   │   ├── __init__.py
    │   │   ├── logger
    │   │   │   └── __init__.py
    │   │   ├── vllm
    │   │   │   └── __init__.py
    │   │   ├── logging_utils.py
    │   │   ├── profiler
    │   │   │   ├── empty_annotations.py
    │   │   │   └── __init__.py
    │   │   ├── transformers_compat.py
    │   │   ├── kernel
    │   │   │   └── __init__.py
    │   │   ├── distributed.py
    │   │   ├── net_utils.py
    │   │   ├── torch_dtypes.py
    │   │   ├── device.py
    │   │   └── config.py
    │   ├── tools
    │   │   ├── __init__.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   └── mcp_clients
    │   │   │       └── utils.py
    │   ├── interactions
    │   │   ├── utils
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── single_controller
    │   │   ├── base
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   └── ray
    │   │   │   └── __init__.py
    │   └── __init__.py
    ├── Notice.txt
    ├── .gemini
    │   └── config.yaml
    ├── .github
    │   ├── dependabot.yml
    │   ├── workflows
    │   │   ├── secrets_scan.yml
    │   │   ├── type-coverage-check.yml
    │   │   ├── pre-commit-full.yml
    │   │   ├── pre-commit.yml
    │   │   ├── .deprecate
    │   │   │   └── e2e_prime.yml
    │   │   ├── README.md
    │   │   ├── e2e_sppo.yml
    │   │   ├── check-pr-title.yml
    │   │   ├── e2e_spin.yml
    │   │   └── scorecard.yml
    │   ├── CODEOWNERS
    │   └── PULL_REQUEST_TEMPLATE.md
    ├── requirements-npu.txt
    ├── .readthedocs.yaml
    ├── requirements_sglang.txt
    ├── requirements.txt
    ├── scripts
    │   ├── __init__.py
    │   ├── print_cfg.py
    │   ├── generate_trainer_config.sh
    │   └── install_vllm_sglang_mcore.sh
    ├── .pre-commit-config.yaml
    ├── .gitignore
    └── examples
    │   └── sglang_multiturn
    │       └── tool_config.yaml
├── .gitignore
├── images
    └── introduction.png
├── inference
    └── scripts
    │   ├── sglang_glm_45_air.sh
    │   ├── sglang_qwen_coder_30B.sh
    │   ├── sglang_sr_scientist_30B.sh
    │   ├── sandbox.sh
    │   ├── sglang_qwen_coder_480B.sh
    │   ├── sglang_oss_120b.sh
    │   ├── sglang_oss_20b.sh
    │   └── inference.sh
└── utils
    └── data_preprocessing.py


/verl/verl/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | data/
3 | 
4 | 


--------------------------------------------------------------------------------
/verl/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.5.0.dev
2 | 


--------------------------------------------------------------------------------
/verl/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/images/introduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/SR-Scientist/HEAD/images/introduction.png


--------------------------------------------------------------------------------
/verl/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   CUDA_DEVICE_MAX_CONNECTIONS: "1"
6 | 


--------------------------------------------------------------------------------
/verl/verl/workers/rollout/sglang_rollout/test.py:
--------------------------------------------------------------------------------
1 | from sglang.srt.function_call.function_call_parser import FunctionCallParser
2 | items = FunctionCallParser.ToolCallParserEnum.items()
3 | print(items)


--------------------------------------------------------------------------------
/verl/.gemini/config.yaml:
--------------------------------------------------------------------------------
 1 | have_fun: false
 2 | code_review:
 3 |   disable: false
 4 |   comment_severity_threshold: HIGH
 5 |   max_review_comments: -1
 6 |   pull_request_opened:
 7 |     help: false
 8 |     summary: false
 9 |     code_review: true
10 | ignore_patterns: []
11 | 


--------------------------------------------------------------------------------
/inference/scripts/sglang_glm_45_air.sh:
--------------------------------------------------------------------------------
1 | MODEL_PATH="../models/GLM-4.5-FP8"
2 | conda activate srscientist
3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \
4 |     --tp 8 \
5 |     --tool-call-parser glm45 \
6 |     --reasoning-parser glm45 \
7 |     --mem-fraction-static 0.85
8 | 
9 |     


--------------------------------------------------------------------------------
/inference/scripts/sglang_qwen_coder_30B.sh:
--------------------------------------------------------------------------------
1 | MODEL_PATH="../models/Qwen3-Coder-30B-A3B-Instruct"
2 | conda activate srscientist
3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \
4 |     --tp 8 \
5 |     --tool-call-parser qwen3_coder \
6 |     --mem-fraction-static 0.85
7 | 
8 | 
9 |     


--------------------------------------------------------------------------------
/inference/scripts/sglang_sr_scientist_30B.sh:
--------------------------------------------------------------------------------
1 | MODEL_PATH="../models/Qwen3-Coder-30B-A3B-Instruct"
2 | conda activate srscientist
3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \
4 |     --tp 8 \
5 |     --tool-call-parser qwen3_coder \
6 |     --mem-fraction-static 0.85
7 | 
8 | 
9 |     


--------------------------------------------------------------------------------
/verl/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | ## Enabled the dependabot to check the dependencies of the project
2 | ## Dependabot will open pull requests to update dependencies automatically
3 | 
4 | version: 2
5 | updates:
6 |   - package-ecosystem: pip
7 |     directory: "/"
8 |     schedule:
9 |       interval: weekly


--------------------------------------------------------------------------------
/inference/scripts/sandbox.sh:
--------------------------------------------------------------------------------
1 | conda activate sandbox-runtime
2 | make run-online PORT=9010 &
3 | make run-online PORT=9020 &
4 | make run-online PORT=9030 &
5 | make run-online PORT=9040 &
6 | make run-online PORT=9050 &
7 | make run-online PORT=9060 &
8 | make run-online PORT=9070 &
9 | make run-online PORT=9080 &


--------------------------------------------------------------------------------
/inference/scripts/sglang_qwen_coder_480B.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH="../models/Qwen3-Coder-480B-A35B-Instruct-FP8"
 2 | conda activate srscientist
 3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \
 4 |     --tp 4 \
 5 |     --dp 2 \
 6 |     --tool-call-parser qwen3_coder \
 7 |     --mem-fraction-static 0.85
 8 | 
 9 | 
10 |     


--------------------------------------------------------------------------------
/verl/requirements-npu.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | hydra-core
 7 | numpy<2.0.0
 8 | pandas
 9 | peft
10 | pyarrow>=15.0.0
11 | pybind11
12 | pylatexenc
13 | tensordict>=0.8.0,<=0.9.1,!=0.9.0
14 | transformers==4.52.4
15 | ray==2.46.0
16 | wandb
17 | mathruler
18 | torchdata
19 | einops
20 | qwen_vl_utils
21 | torchvision==0.20.1
22 | 


--------------------------------------------------------------------------------
/verl/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-22.04
 8 |   tools:
 9 |     python: "3.11"
10 |     rust: "1.70"
11 | 
12 | sphinx:
13 |   configuration: docs/conf.py
14 | 
15 | python:
16 |   install:
17 |     - requirements: docs/requirements-docs.txt
18 |     - method: pip
19 |       path: .
20 | 


--------------------------------------------------------------------------------
/verl/requirements_sglang.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | numpy<2.0.0
 9 | pandas
10 | peft
11 | pyarrow>=19.0.0
12 | pybind11
13 | pylatexenc
14 | ray[default]>=2.10
15 | tensordict>=0.8.0,<=0.9.1,!=0.9.0
16 | torchdata
17 | torchvision
18 | transformers
19 | wandb
20 | sglang[all]==0.4.9.post6
21 | huggingface_hub
22 | 


--------------------------------------------------------------------------------
/inference/scripts/sglang_oss_120b.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH="../models/gpt-oss-120b"
 2 | conda activate srscientist
 3 | # For machines without internet access, set TIKTOKEN_RS_CACHE_DIR=CACHE_FILE by following the instructions in this issue: https://huggingface.co/openai/gpt-oss-120b/discussions/39.
 4 | python3 -m sglang.launch_server --model-path  $MODEL_PATH \
 5 |     --tp 8 \
 6 |     --tool-call-parser gpt-oss \
 7 |     --reasoning-parser gpt-oss \
 8 |     --mem-fraction-static 0.85
 9 | 
10 |     


--------------------------------------------------------------------------------
/inference/scripts/sglang_oss_20b.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH="../models/gpt-oss-120b"
 2 | conda activate srscientist
 3 | # For machines without internet access, set TIKTOKEN_RS_CACHE_DIR=CACHE_FILE by following the instructions in this issue: https://huggingface.co/openai/gpt-oss-120b/discussions/39.
 4 | python3 -m sglang.launch_server --model-path  $MODEL_PATH \
 5 |     --tp 8 \
 6 |     --tool-call-parser gpt-oss \
 7 |     --reasoning-parser gpt-oss \
 8 |     --mem-fraction-static 0.85
 9 | 
10 |     


--------------------------------------------------------------------------------
/verl/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 
12 | ray_kwargs:
13 |   ray_init:
14 |     num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
15 |   timeline_json_file: null
16 | 


--------------------------------------------------------------------------------
/verl/requirements.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | liger-kernel
 9 | numpy<2.0.0
10 | pandas
11 | peft
12 | pyarrow>=19.0.0
13 | pybind11
14 | pylatexenc
15 | pre-commit
16 | ray[default]
17 | tensordict>=0.8.0,<=0.9.1,!=0.9.0
18 | torchdata
19 | transformers
20 | # vllm==0.8.4
21 | wandb
22 | packaging>=20.0
23 | uvicorn
24 | fastapi
25 | latex2sympy2_extended
26 | math_verify
27 | tensorboard
28 | torch_memory_saver


--------------------------------------------------------------------------------
/verl/.github/workflows/secrets_scan.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - v0.*
 6 |   pull_request:
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
17 |         with:
18 |           fetch-depth: 0
19 |       - name: Secret Scanning
20 |         uses: trufflesecurity/trufflehog@7dc056a193116ba8d82154bf0549381c8fb8545c # v3.88.14
21 |         with:
22 |           extra_args: --results=verified,unknown
23 | 


--------------------------------------------------------------------------------
/verl/verl/workers/rollout/sglang_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | 


--------------------------------------------------------------------------------
/verl/scripts/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/experimental/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/model_merger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/experimental/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/utils/experimental/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/workers/engine/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/experimental/dynamic_dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/verl/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/verl/verl/tools/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/verl/verl/interactions/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/verl/verl/workers/roles/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .critic import CriticWorker
16 | 
17 | __all__ = ["CriticWorker"]
18 | 


--------------------------------------------------------------------------------
/verl/verl/utils/metric/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .utils import reduce_metrics
16 | 
17 | __all__ = ["reduce_metrics"]
18 | 


--------------------------------------------------------------------------------
/verl/verl/workers/engine/fsdp/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .engine_impl import FSDPEngine
15 | 
16 | __all__ = ["FSDPEngine"]
17 | 


--------------------------------------------------------------------------------
/verl/verl/interactions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023-2024 SGLang Team
 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/verl/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 
17 | __all__ = ["NaiveRollout"]
18 | 


--------------------------------------------------------------------------------
/verl/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 
17 | __all__ = ["BasePPORewardModel"]
18 | 


--------------------------------------------------------------------------------
/verl/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 
17 | __all__ = ["MegatronRewardModel"]
18 | 


--------------------------------------------------------------------------------
/verl/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # APIs kept for backward compatibility purpose
16 | # For new features please develop in verl/utils/profiler/
17 | from ..profiler import *  # noqa
18 | 


--------------------------------------------------------------------------------
/verl/verl/workers/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .base import BaseEngine, EngineRegistry
15 | from .fsdp import FSDPEngine
16 | 
17 | __all__ = ["BaseEngine", "EngineRegistry", "FSDPEngine"]
18 | 


--------------------------------------------------------------------------------
/verl/verl/utils/reward_score/prime_code/README.md:
--------------------------------------------------------------------------------
 1 | ## LiveCodeBench
 2 | 
 3 | ### Introduction
 4 | [LiveCodeBench](https://github.com/LiveCodeBench/LiveCodeBench) provides holistic and contamination-free evaluation of coding capabilities of LLMs. Particularly, LiveCodeBench continuously collects new problems over time from contests across three competition platforms -- LeetCode, AtCoder, and CodeForces. 
 5 | 
 6 | ### How to reproduce
 7 | Our evaluation is grounded on the version found in LiveCodeBench.
 8 | > **Installation**
 9 | ```bash
10 | # Make sure the CUDA version > 12.0.
11 | pip install -r requirements.txt
12 | pip install flash-attn --no-build-isolation
13 | ```
14 | 
15 | ### Acknowleage
16 | Thank you to the [LiveCodeBench](https://livecodebench.github.io/leaderboard.html) team for their contributions to the open-source community.


--------------------------------------------------------------------------------
/verl/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .algorithm import *  # noqa
16 | from .config import *  # noqa
17 | from . import config, algorithm
18 | 
19 | __all__ = config.__all__ + algorithm.__all__
20 | 


--------------------------------------------------------------------------------
/verl/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .hf_rollout import HFRollout
17 | from .naive import NaiveRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 
19 | __all__ = ["RLHFDataset", "RMDataset", "SFTDataset"]
20 | 


--------------------------------------------------------------------------------
/verl/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup
17 | 
18 | __all__ = ["Worker", "WorkerGroup", "ClassWithInitArgs", "ResourcePool"]
19 | 


--------------------------------------------------------------------------------
/verl/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/verl/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # APIs kept for backward compatibility purpose
16 | # This file is deprecated, for new features please develop in profiler/performance.py
17 | from verl.utils.profiler.performance import simple_timer, reduce_timing  # noqa
18 | 


--------------------------------------------------------------------------------
/verl/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import config, tokenizer
16 | from .config import omega_conf_to_dataclass
17 | from .tokenizer import hf_processor, hf_tokenizer
18 | 
19 | __all__ = tokenizer.__all__ + config.__all__ + ["hf_processor", "hf_tokenizer", "omega_conf_to_dataclass"]
20 | 


--------------------------------------------------------------------------------
/verl/verl/experimental/agent_loop/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .agent_loop import AgentLoopBase, AgentLoopManager
16 | from .single_turn_agent_loop import SingleTurnAgentLoop
17 | from .tool_agent_loop import ToolAgentLoop
18 | 
19 | _ = [SingleTurnAgentLoop, ToolAgentLoop]
20 | 
21 | __all__ = ["AgentLoopBase", "AgentLoopManager"]
22 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/type-coverage-check.yml:
--------------------------------------------------------------------------------
 1 | name: Type Annotation and Docstring Coverage
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - '**/*.py'
 7 |       - '.github/workflows/type-coverage-check.yml'
 8 | 
 9 | jobs:
10 |   type-coverage-check:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |         with:
15 |           fetch-depth: 0  # 🚨 Important: fetch full history so `origin/main` is available
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install dependencies
22 |         run: |
23 |           pip install gitpython
24 |           pip install -e .[sglang]
25 |       - name: Run type annotation coverage check
26 |         run: |
27 |           python3 tests/special_sanity/type_coverage_check.py
28 |       - name: Run docstring coverage check
29 |         run: |
30 |           python3 tests/special_sanity/check_api_docs.py verl
31 | 


--------------------------------------------------------------------------------
/verl/verl/workers/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .critic import *  # noqa
16 | from .actor import *  # noqa
17 | from .engine import *  # noqa
18 | from .optimizer import *  # noqa
19 | from .rollout import *  # noqa
20 | from . import actor, critic, engine, optimizer, rollout
21 | 
22 | __all__ = actor.__all__ + critic.__all__ + engine.__all__ + optimizer.__all__ + rollout.__all__
23 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/pre-commit-full.yml:
--------------------------------------------------------------------------------
 1 | name: pre-commit-full
 2 | 
 3 | # Run weekly on Sunday at 00:00 UTC
 4 | on:
 5 |   schedule:
 6 |     - cron: "0 0 * * 0"
 7 |   # Allow manual triggering
 8 |   workflow_dispatch:
 9 | 
10 | # Declare permissions just read content.
11 | permissions:
12 |   contents: read
13 | 
14 | jobs:
15 |   pre-commit-full:
16 |     runs-on: ubuntu-latest
17 |     strategy:
18 |       matrix:
19 |         python-version: ["3.12"]
20 |     steps:
21 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
22 |       - name: Set up Python ${{ matrix.python-version }}
23 |         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 |       - name: Set ruff --output-format=github
27 |         run: |
28 |           sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
29 |           git add .pre-commit-config.yaml
30 |       - uses: pre-commit/action@v3.0.1
31 | 


--------------------------------------------------------------------------------
/verl/verl/models/mcore/qwen2_5_vl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | # Copyright (c) 2024 Alibaba PAI Team.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | from .model import Qwen2_5VLModel
19 | from .vision_config import get_vision_model_config, get_vision_projection_config
20 | 
21 | __all__ = ["Qwen2_5VLModel", "get_vision_model_config", "get_vision_projection_config"]
22 | 


--------------------------------------------------------------------------------
/verl/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | 
17 | from verl import DataProto
18 | 
19 | __all__ = ["BaseRollout"]
20 | 
21 | 
22 | class BaseRollout(ABC):
23 |     """Base class for rollout."""
24 | 
25 |     @abstractmethod
26 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
27 |         """Generate sequences"""
28 |         pass
29 | 


--------------------------------------------------------------------------------
/verl/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | /docs @eric-haibin-lin @zhaochenyang20 @hongpeng-guo
 2 | /docs/amd_tutorial @yushengsu-thu
 3 | /docs/slang_multiturn @zhaochenyang20 @SwordFaith
 4 | 
 5 | /recipe/dapo @tongyx361 @PeterSH6
 6 | /recipe/spin @zhaochenyang20
 7 | /recipe/sppo @zhaochenyang20
 8 | 
 9 | /third_party/sglang @zhaochenyang20 @SwordFaith
10 | /third_party/vllm @PeterSH6 @wuxibin89
11 | /verl/single_controller @zw0610 @wuxibin89 @hongpeng-guo
12 | /verl/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
13 | /verl/workers/engine @eric-haibin-lin @vermouth1992 @ZihengJiang
14 | /verl/workers/roles @eric-haibin-lin @vermouth1992 @ZihengJiang
15 | /verl/workers/engine/fsdp @eric-haibin-lin @vermouth1992 @ZihengJiang
16 | /verl/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq
17 | /verl/workers/rollout/sglang_rollout @zhaochenyang20 @SwordFaith @chenhaiq
18 | 
19 | /tests/single_controller @zw0610 @wuxibin89
20 | /tests/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
21 | /tests/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq
22 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/ref/megatron_ref.yaml:
--------------------------------------------------------------------------------
 1 | # megatron ref config, inheriting from trainer/config/ref/ref.yaml
 2 | defaults:
 3 |   - ref
 4 |   # load the reference default config, then apply the fields in the current yaml
 5 |   - _self_
 6 | 
 7 | strategy: megatron
 8 | 
 9 | megatron:
10 |   _target_: verl.workers.config.MegatronEngineConfig
11 |   param_offload: False
12 |   tensor_model_parallel_size: 1
13 |   expert_model_parallel_size: 1
14 |   expert_tensor_parallel_size: None
15 |   pipeline_model_parallel_size: 1
16 |   virtual_pipeline_model_parallel_size: null # change VPP interface for parallelism tests
17 |   context_parallel_size: 1
18 |   sequence_parallel: True
19 |   use_distributed_optimizer: False
20 |   use_dist_checkpointing: False
21 |   dist_checkpointing_path: null
22 |   seed: ${oc.select:actor_rollout_ref.actor.megatron.seed,42}
23 |   override_transformer_config: ${oc.select:actor_rollout_ref.actor.megatron.override_transformer_config,{}}
24 |   use_mbridge: ${oc.select:actor_rollout_ref.actor.megatron.use_mbridge,False}
25 | 
26 | load_weight: True


--------------------------------------------------------------------------------
/verl/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from . import base
17 | from .base import *
18 | 
19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
20 | 
21 | # Note(haibin.lin): single_controller.__version__ is deprecated
22 | with open(os.path.join(os.path.join(version_folder, os.pardir), "version/version")) as f:
23 |     __version__ = f.read().strip()
24 | 
25 | 
26 | __all__ = base.__all__
27 | 


--------------------------------------------------------------------------------
/verl/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import (
16 |     RayClassWithInitArgs,
17 |     RayResourcePool,
18 |     RayWorkerGroup,
19 |     create_colocated_worker_cls,
20 |     create_colocated_worker_cls_fused,
21 | )
22 | 
23 | __all__ = [
24 |     "RayClassWithInitArgs",
25 |     "RayResourcePool",
26 |     "RayWorkerGroup",
27 |     "create_colocated_worker_cls",
28 |     "create_colocated_worker_cls_fused",
29 | ]
30 | 


--------------------------------------------------------------------------------
/verl/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .aggregate_logger import (
17 |     DecoratorLoggerBase,
18 |     LocalLogger,
19 |     log_with_rank,
20 |     print_rank_0,
21 |     print_with_rank,
22 |     print_with_rank_and_timer,
23 | )
24 | 
25 | __all__ = [
26 |     "LocalLogger",
27 |     "DecoratorLoggerBase",
28 |     "print_rank_0",
29 |     "print_with_rank",
30 |     "print_with_rank_and_timer",
31 |     "log_with_rank",
32 | ]
33 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/npu_profile/npu_profile.yaml:
--------------------------------------------------------------------------------
 1 | # Options for the npu profiler
 2 | options:
 3 | 
 4 |   # Storage path of collected data.
 5 |   save_path: ./profiler_data
 6 | 
 7 |   # The roles that will be profiled. Only takes effect in discrete mode.
 8 |   # optional values: all, rollout_generate, actor_compute_log_prob, actor_update and ref_compute_log_prob.
 9 |   # "all" means all roles will be profiled.
10 |   roles: ["all"]
11 | 
12 |   # Collection level, optional values: level_none, level0, level1, level2.
13 |   level: level1
14 | 
15 |   # Whether to enable memory analysis.
16 |   with_memory: False
17 | 
18 |   # Whether to record tensor shape.
19 |   record_shapes: False
20 | 
21 |   # Whether to record Device-side performance data.
22 |   with_npu: True
23 | 
24 |   # Whether to record Host-side performance data.
25 |   with_cpu: True
26 | 
27 |   # Whether to record Python call stack information.
28 |   with_module: False
29 | 
30 |   # Whether to record operator call stack information.
31 |   with_stack: False
32 | 
33 |   # Whether to automatically parse the data.
34 |   analysis: True


--------------------------------------------------------------------------------
/verl/verl/models/mcore/mbridge.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | try:
17 |     from mbridge import AutoBridge
18 |     from mbridge.utils.post_creation_callbacks import freeze_moe_router, make_value_model
19 | except ImportError:
20 |     print("mbridge package not found. Please install mbridge with `pip install verl[mcore]` or `pip install mbridge`")
21 |     raise
22 | 
23 | __all__ = ["AutoBridge", "make_value_model", "freeze_moe_router"]
24 | 


--------------------------------------------------------------------------------
/verl/verl/models/mcore/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .registry import (
17 |     get_mcore_forward_fn,
18 |     get_mcore_forward_fused_fn,
19 |     get_mcore_weight_converter,
20 |     hf_to_mcore_config,
21 |     init_mcore_model,
22 | )
23 | 
24 | __all__ = [
25 |     "hf_to_mcore_config",
26 |     "init_mcore_model",
27 |     "get_mcore_forward_fn",
28 |     "get_mcore_weight_converter",
29 |     "get_mcore_forward_fused_fn",
30 | ]
31 | 


--------------------------------------------------------------------------------
/verl/verl/utils/vllm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .utils import TensorLoRARequest, VLLMHijack, is_version_ge
16 | 
17 | # The contents of vllm/patch.py should not be imported here, because the contents of
18 | # patch.py should be imported after the vllm LLM instance is created. Therefore,
19 | # wait until you actually start using it before importing the contents of
20 | # patch.py separately.
21 | 
22 | __all__ = [
23 |     "TensorLoRARequest",
24 |     "VLLMHijack",
25 |     "is_version_ge",
26 | ]
27 | 


--------------------------------------------------------------------------------
/verl/verl/models/qwen2/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelQwen2Attention
16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad
17 | from .parallel_mlp import ParallelQwen2MLP
18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm
19 | 
20 | __all__ = [
21 |     "ParallelQwen2Attention",
22 |     "ParallelQwen2DecoderLayer",
23 |     "ParallelQwen2DecoderLayerRmPad",
24 |     "ParallelQwen2MLP",
25 |     "ParallelQwen2RMSNorm",
26 | ]
27 | 


--------------------------------------------------------------------------------
/verl/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: "v0.12.2"
 4 |     hooks:
 5 |       - id: ruff
 6 |         args: ["--fix", "--show-fixes", "--output-format=full"]
 7 |         exclude: ^.*\.(ipynb)$
 8 |       - id: ruff-format
 9 | 
10 |   - repo: https://github.com/pre-commit/mirrors-mypy
11 |     rev: 'v1.17.0'
12 |     hooks:
13 |       - id: mypy
14 | 
15 |   - repo: local
16 |     hooks:
17 |       - id: autogen-trainer-cfg
18 |         name: Generate and verify verl/trainer/config/_generated_*.yaml
19 |         entry: scripts/generate_trainer_config.sh
20 |         language: script
21 |         pass_filenames: false
22 | 
23 |   - repo: local
24 |     hooks:
25 |       - id: check-docstrings
26 |         name: Check doc string coverage
27 |         entry: python3 tests/special_sanity/check_docstrings.py
28 |         language: python
29 |         pass_filenames: false
30 | 
31 |   - repo: local
32 |     hooks:
33 |       - id: check-license
34 |         name: Check license
35 |         entry: python3 tests/special_sanity/check_license.py --directory .
36 |         language: python
37 |         pass_filenames: false
38 | 


--------------------------------------------------------------------------------
/verl/verl/utils/reward_score/math_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Individual Contributor: Mert Unsal
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .math import compute_score
16 | 
17 | 
18 | def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):
19 |     """
20 |     This is a demonstration of how the batched reward function should look like.
21 |     Typically, you want to use batched reward to speed up the process with parallelization
22 |     """
23 |     return [
24 |         compute_score(solution_str, ground_truth)
25 |         for solution_str, ground_truth in zip(solution_strs, ground_truths, strict=True)
26 |     ]
27 | 


--------------------------------------------------------------------------------
/verl/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | 
18 | import torch
19 | 
20 | 
21 | def set_basic_config(level):
22 |     """
23 |     This function sets the global logging format and level. It will be called when import verl
24 |     """
25 |     logging.basicConfig(format="%(levelname)s:%(asctime)s:%(message)s", level=level)
26 | 
27 | 
28 | def log_to_file(string):
29 |     print(string)
30 |     if os.path.isdir("logs"):
31 |         with open(f"logs/log_{torch.distributed.get_rank()}", "a+") as f:
32 |             f.write(string + "\n")
33 | 


--------------------------------------------------------------------------------
/verl/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 |     def __init__(self):
23 |         self.timing = {}
24 | 
25 |     def __enter__(self):
26 |         pass
27 | 
28 |     def __exit__(self, exc_type, exc_value, traceback):
29 |         pass
30 | 
31 |     def preprocess_data(self, data: DataProto) -> DataProto:
32 |         return data
33 | 
34 |     def postprocess_data(self, data: DataProto) -> DataProto:
35 |         return data
36 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | # c.f. https://github.com/pre-commit/action?tab=readme-ov-file#using-this-action
 2 | name: pre-commit
 3 | 
 4 | # No need to avoid / cancel lightweight pre-commit jobs
 5 | on:
 6 |   pull_request:
 7 |   push:
 8 |     branches:
 9 |       - main
10 |       - v0.*
11 | 
12 | # Declare permissions just read content.
13 | permissions:
14 |   contents: read
15 | 
16 | jobs:
17 |   pre-commit:
18 |     runs-on: ubuntu-latest
19 |     strategy:
20 |       matrix:
21 |         python-version: ["3.12"]
22 |     steps:
23 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
24 |       - name: Set up Python ${{ matrix.python-version }}
25 |         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
26 |         with:
27 |           python-version: ${{ matrix.python-version }}
28 |       - name: Install the current repository
29 |         run: |
30 |           pip install -e .
31 |       - name: Set ruff --output-format=github
32 |         run: |
33 |           sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
34 |           git add .pre-commit-config.yaml
35 |       # Check "--all-files" by default
36 |       - uses: pre-commit/action@v3.0.1
37 | 


--------------------------------------------------------------------------------
/verl/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | import torch
21 | 
22 | from verl import DataProto
23 | 
24 | __all__ = ["BasePPOCritic"]
25 | 
26 | 
27 | class BasePPOCritic(ABC):
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/verl/utils/profiler/empty_annotations.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Callable, Optional
16 | 
17 | 
18 | def mark_start_range(
19 |     message: Optional[str] = None,
20 |     color: Optional[str] = None,
21 |     domain: Optional[str] = None,
22 |     category: Optional[str] = None,
23 | ) -> None:
24 |     pass
25 | 
26 | 
27 | def mark_end_range(range_id: str) -> None:
28 |     pass
29 | 
30 | 
31 | def mark_annotate(
32 |     message: Optional[str] = None,
33 |     color: Optional[str] = None,
34 |     domain: Optional[str] = None,
35 |     category: Optional[str] = None,
36 | ) -> Callable:
37 |     def decorator(func):
38 |         return func
39 | 
40 |     return decorator
41 | 


--------------------------------------------------------------------------------
/verl/verl/workers/reward_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .registry import get_reward_manager_cls, register  # noqa: I001
16 | from .batch import BatchRewardManager
17 | from .dapo import DAPORewardManager
18 | from .naive import NaiveRewardManager
19 | from .prime import PrimeRewardManager
20 | from .sr_scientist import SRScientistRewardManager
21 | 
22 | 
23 | # Note(haibin.lin): no need to include all reward managers here in case of complicated dependencies
24 | __all__ = [
25 |     "BatchRewardManager",
26 |     "DAPORewardManager",
27 |     "NaiveRewardManager",
28 |     "PrimeRewardManager",
29 |     "register",
30 |     "get_reward_manager_cls",
31 |     "SRScientistRewardManager", 
32 | ]
33 | 


--------------------------------------------------------------------------------
/verl/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     ParallelLlamaForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelLlamaForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelLlamaForCausalLMRmPadPP,
21 |     ParallelLlamaForValueRmPad,
22 |     ParallelLlamaForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelLlamaModel,
25 | )
26 | 
27 | __all__ = [
28 |     "ParallelLlamaForCausalLM",
29 |     "ParallelLlamaForCausalLMRmPad",
30 |     "ParallelLlamaForCausalLMRmPadPP",
31 |     "ParallelLlamaForValueRmPad",
32 |     "ParallelLlamaForValueRmPadPP",
33 |     "ParallelLlamaModel",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/verl/models/qwen2/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_qwen2_megatron import (
16 |     ParallelQwen2ForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelQwen2ForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelQwen2ForCausalLMRmPadPP,
21 |     ParallelQwen2ForValueRmPad,
22 |     ParallelQwen2ForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelQwen2Model,
25 | )
26 | 
27 | __all__ = [
28 |     "ParallelQwen2ForCausalLM",
29 |     "ParallelQwen2ForCausalLMRmPad",
30 |     "ParallelQwen2ForCausalLMRmPadPP",
31 |     "ParallelQwen2ForValueRmPad",
32 |     "ParallelQwen2ForValueRmPadPP",
33 |     "ParallelQwen2Model",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_linear import (
18 |     LinearForLastLayer,
19 |     MergedColumnParallelLinear,
20 |     QKVParallelLinear,
21 | )
22 | from .parallel_mlp import ParallelLlamaMLP
23 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
24 | 
25 | __all__ = [
26 |     "LinearForLastLayer",
27 |     "MergedColumnParallelLinear",
28 |     "QKVParallelLinear",
29 |     "ParallelLlamaAttention",
30 |     "ParallelLlamaDecoderLayer",
31 |     "ParallelLlamaDecoderLayerRmPad",
32 |     "ParallelLlamaMLP",
33 |     "ParallelLlamaRMSNorm",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/verl/experimental/dataset/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Amazon.com Inc and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from abc import abstractmethod
15 | from collections.abc import Sized
16 | 
17 | from omegaconf import DictConfig
18 | from torch.utils.data import Sampler
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class AbstractSampler(Sampler[int]):
24 |     """Abstract interface for custom samplers."""
25 | 
26 |     @abstractmethod
27 |     def __init__(
28 |         self,
29 |         data_source: Sized,
30 |         data_config: DictConfig,
31 |     ):
32 |         pass
33 | 
34 | 
35 | class AbstractCurriculumSampler(AbstractSampler):
36 |     """Experimental interface for curriculum learning samplers."""
37 | 
38 |     @abstractmethod
39 |     def update(self, batch: DataProto) -> None:
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/verl/third_party/sglang/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
15 | #
16 | # Licensed under the Apache License, Version 2.0 (the "License");
17 | # you may not use this file except in compliance with the License.
18 | # You may obtain a copy of the License at
19 | #
20 | #     http://www.apache.org/licenses/LICENSE-2.0
21 | #
22 | # Unless required by applicable law or agreed to in writing, software
23 | # distributed under the License is distributed on an "AS IS" BASIS,
24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | # See the License for the specific language governing permissions and
26 | # limitations under the License.
27 | 


--------------------------------------------------------------------------------
/verl/scripts/print_cfg.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | try:
15 |     import hydra
16 | except ImportError as e:
17 |     raise ImportError("Please install hydra-core via 'pip install hydra-core' and retry.") from e
18 | 
19 | 
20 | @hydra.main(config_path="../verl/trainer/config", config_name="ppo_trainer", version_base=None)
21 | def main(config):
22 |     """Main entry point for PPO training with Hydra configuration management.
23 | 
24 |     Args:
25 |         config_dict: Hydra configuration dictionary containing training parameters.
26 |     """
27 |     print(config)
28 |     from verl.utils.config import omega_conf_to_dataclass
29 | 
30 |     profiler_config = omega_conf_to_dataclass(config.critic.profiler)
31 |     print(profiler_config)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     main()
36 | 


--------------------------------------------------------------------------------
/verl/verl/utils/transformers_compat.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | Compatibility utilities for different versions of transformers library.
17 | """
18 | 
19 | # Handle version compatibility for flash_attn_supports_top_left_mask
20 | # This function was added in newer versions of transformers
21 | try:
22 |     from transformers.modeling_flash_attention_utils import flash_attn_supports_top_left_mask
23 | except ImportError:
24 |     # For older versions of transformers that don't have this function
25 |     # Default to False as a safe fallback for older versions
26 |     def flash_attn_supports_top_left_mask():
27 |         """Fallback implementation for older transformers versions.
28 |         Returns False to disable features that require this function.
29 |         """
30 |         return False
31 | 


--------------------------------------------------------------------------------
/verl/verl/utils/kernel/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
19 | #
20 | # Licensed under the Apache License, Version 2.0 (the "License");
21 | # you may not use this file except in compliance with the License.
22 | # You may obtain a copy of the License at
23 | #
24 | #     http://www.apache.org/licenses/LICENSE-2.0
25 | #
26 | # Unless required by applicable law or agreed to in writing, software
27 | # distributed under the License is distributed on an "AS IS" BASIS,
28 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29 | # See the License for the specific language governing permissions and
30 | # limitations under the License.
31 | 
32 | 


--------------------------------------------------------------------------------
/verl/verl/workers/reward_manager/abstract.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2025 SGLang Team
 2 | # Copyright Amazon.com, Inc. or its affiliates.
 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from abc import ABC, abstractmethod
18 | from typing import Any, Callable
19 | 
20 | import torch
21 | 
22 | from verl.protocol import DataProto
23 | 
24 | RawRewardFn = Callable[..., Any]
25 | 
26 | 
27 | class AbstractRewardManager(ABC):
28 |     @abstractmethod
29 |     def __init__(
30 |         self,
31 |         tokenizer: Any,
32 |         num_examine: int,
33 |         compute_score: RawRewardFn | None,
34 |         reward_fn_key: str = "data_source",
35 |         **kwargs: Any,
36 |     ):
37 |         pass
38 | 
39 |     @abstractmethod
40 |     def __call__(
41 |         self,
42 |         data: DataProto,
43 |         return_dict: bool = False,
44 |     ) -> torch.Tensor | dict[str, Any]:
45 |         pass
46 | 


--------------------------------------------------------------------------------
/verl/verl/utils/reward_score/geo3k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from mathruler.grader import extract_boxed_content, grade_answer
17 | 
18 | 
19 | def format_reward(predict_str: str) -> float:
20 |     pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
21 |     match_result = re.fullmatch(pattern, predict_str)
22 |     return 1.0 if match_result else 0.0
23 | 
24 | 
25 | def acc_reward(predict_str: str, ground_truth: str, use_boxed: bool = True) -> float:
26 |     if use_boxed:
27 |         answer = extract_boxed_content(predict_str)
28 |     else:
29 |         answer = predict_str
30 |     return 1.0 if grade_answer(answer, ground_truth) else 0.0
31 | 
32 | 
33 | def compute_score(predict_str: str, ground_truth: str, use_boxed: bool = True, format_score: float = 0.1) -> float:
34 |     return (1.0 - format_score) * acc_reward(predict_str, ground_truth, use_boxed) + format_score * format_reward(
35 |         predict_str
36 |     )
37 | 


--------------------------------------------------------------------------------
/verl/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | 
16 | import os
17 | 
18 | import torch.distributed
19 | 
20 | from verl.utils.device import get_nccl_backend, get_torch_device
21 | 
22 | 
23 | def initialize_global_process_group(timeout_second=36000):
24 |     from datetime import timedelta
25 | 
26 |     torch.distributed.init_process_group(
27 |         get_nccl_backend(),
28 |         timeout=timedelta(seconds=timeout_second),
29 |         init_method=os.environ.get("DIST_INIT_METHOD", None),
30 |     )
31 |     local_rank = int(os.environ["LOCAL_RANK"])
32 |     rank = int(os.environ["RANK"])
33 |     world_size = int(os.environ["WORLD_SIZE"])
34 | 
35 |     if torch.distributed.is_initialized():
36 |         get_torch_device().set_device(local_rank)
37 |     return local_rank, rank, world_size
38 | 
39 | 
40 | def destroy_global_process_group():
41 |     if torch.distributed.is_initialized():
42 |         torch.distributed.destroy_process_group()
43 | 


--------------------------------------------------------------------------------
/verl/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | from verl.utils.device import get_device_id
18 | 
19 | 
20 | class MemoryBuffer:
21 |     def __init__(self, numel, numel_padded, dtype):
22 |         self.numel = numel
23 |         self.numel_padded = numel_padded
24 |         self.dtype = dtype
25 |         self.data = torch.zeros(self.numel_padded, dtype=self.dtype, device=get_device_id(), requires_grad=False)
26 | 
27 |     def zero(self):
28 |         """Reset the buffer to zero."""
29 |         self.data.zero_()
30 | 
31 |     def get(self, shape, start_index):
32 |         """Return a tensor with the input `shape` as a view into the
33 |         1-D data starting at `start_index`."""
34 |         end_index = start_index + shape.numel()
35 |         assert end_index <= self.numel, "requested tensor is out of the buffer range."
36 |         buffer_tensor = self.data[start_index:end_index]
37 |         buffer_tensor = buffer_tensor.view(shape)
38 |         return buffer_tensor
39 | 


--------------------------------------------------------------------------------
/verl/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | from importlib.metadata import PackageNotFoundError, version
16 | 
17 | from .vllm_rollout_spmd import vLLMAsyncRollout, vLLMRollout  # noqa: F401
18 | 
19 | 
20 | def get_version(pkg):
21 |     try:
22 |         return version(pkg)
23 |     except PackageNotFoundError:
24 |         return None
25 | 
26 | 
27 | vllm_package_name = "vllm"
28 | vllm_package_version = get_version(vllm_package_name)
29 | if vllm_package_version is None:
30 |     raise PackageNotFoundError(
31 |         "To use vllm rollout, please ensure the 'vllm' package is properly installed. See "
32 |         "https://verl.readthedocs.io/en/latest/start/install.html for more details"
33 |     )
34 | 
35 | if "ROCM_PATH" in os.environ:
36 |     import re
37 | 
38 |     match = re.match(r"(\d+\.\d+\.?\d*)", vllm_package_version)
39 |     if match:
40 |         vllm_package_version = match.group(1)
41 |     else:
42 |         raise ValueError(f"Warning: Could not parse version format: {vllm_package_version}")
43 | 


--------------------------------------------------------------------------------
/verl/verl/utils/reward_score/math_verify.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | try:
16 |     from math_verify.errors import TimeoutException
17 |     from math_verify.metric import math_metric
18 |     from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
19 | except ImportError:
20 |     print("To use Math-Verify, please install it first by running `pip install math-verify`.")
21 | 
22 | 
23 | def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> bool:
24 |     verify_func = math_metric(
25 |         gold_extraction_target=(LatexExtractionConfig(),),
26 |         pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
27 |     )
28 |     ret_score = 0.0
29 | 
30 |     # Wrap the ground truth in \boxed{} format for verification
31 |     ground_truth_boxed = "\\boxed{" + ground_truth + "}"
32 |     try:
33 |         ret_score, _ = verify_func([ground_truth_boxed], [model_output])
34 |     except Exception:
35 |         pass
36 |     except TimeoutException:
37 |         ret_score = timeout_score
38 | 
39 |     return ret_score
40 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/ref/dp_ref.yaml:
--------------------------------------------------------------------------------
 1 | # defaults specify the default config from each component
 2 | defaults:
 3 | 
 4 |   # dp ref config, inheriting from trainer/config/ref/ref.yaml
 5 |   - ref
 6 | 
 7 |   # load the reference default config, then apply the fields in the current yaml
 8 |   - _self_
 9 | 
10 | # ref model is assumed to be identical to actor model. Specify model.path for using a different ref model.
11 | # Potential use case involves on policy distillation where we calculate KL divergence between student actor
12 | # and teacher ref
13 | model: null
14 | 
15 | # config for FSDP strategy
16 | fsdp_config:
17 | 
18 |   # Target class for this configuration
19 |   _target_: verl.workers.config.FSDPEngineConfig
20 | 
21 |   # the wrap policy for FSDP model
22 |   wrap_policy:
23 | 
24 |     # minimum number of params in a wrapped module
25 |     min_num_params: 0
26 | 
27 |   # whether to offload parameters in FSDP
28 |   param_offload: False
29 | 
30 |   # whether to perform reshard after model forward to save memory.
31 |   # only for fsdp2, [True, False, int between 1 and fsdp_size]
32 |   reshard_after_forward: True
33 | 
34 |   # Only for FSDP1: FSDP1 configuration, prefetch the next forward-pass all-gather
35 |   # before the current forward computation.
36 |   forward_prefetch: False
37 | 
38 | # sequence parallel size
39 | # same as actor_rollout_ref.actor.ulysses_sequence_parallel_size if it exists, otherwise 1
40 | ulysses_sequence_parallel_size: ${oc.select:actor_rollout_ref.actor.ulysses_sequence_parallel_size,1}
41 | 
42 | # calculate entropy with chunking to reduce memory peak
43 | entropy_from_logits_with_chunking: False
44 | 
45 | # recompute entropy
46 | entropy_checkpointing: False
47 | 


--------------------------------------------------------------------------------
/verl/verl/utils/profiler/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ..device import is_npu_available
16 | from ..import_utils import is_nvtx_available
17 | from .performance import GPUMemoryLogger, log_gpu_memory_usage, simple_timer
18 | from .profile import DistProfilerExtension, ProfilerConfig
19 | 
20 | if is_nvtx_available():
21 |     from .nvtx_profile import NsightSystemsProfiler as DistProfiler
22 |     from .nvtx_profile import mark_annotate, mark_end_range, mark_start_range, marked_timer
23 | elif is_npu_available:
24 |     from .mstx_profile import NPUProfiler as DistProfiler
25 |     from .mstx_profile import mark_annotate, mark_end_range, mark_start_range, marked_timer
26 | else:
27 |     from .performance import marked_timer
28 |     from .profile import DistProfiler, mark_annotate, mark_end_range, mark_start_range
29 | 
30 | __all__ = [
31 |     "GPUMemoryLogger",
32 |     "log_gpu_memory_usage",
33 |     "mark_start_range",
34 |     "mark_end_range",
35 |     "mark_annotate",
36 |     "DistProfiler",
37 |     "DistProfilerExtension",
38 |     "ProfilerConfig",
39 |     "simple_timer",
40 |     "marked_timer",
41 | ]
42 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/constants_ppo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import os
17 | 
18 | from ray._private.runtime_env.constants import RAY_JOB_CONFIG_JSON_ENV_VAR
19 | 
20 | PPO_RAY_RUNTIME_ENV = {
21 |     "env_vars": {
22 |         "TOKENIZERS_PARALLELISM": "true",
23 |         "NCCL_DEBUG": "WARN",
24 |         "VLLM_LOGGING_LEVEL": "WARN",
25 |         "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "true",
26 |         "CUDA_DEVICE_MAX_CONNECTIONS": "1",
27 |     },
28 | }
29 | 
30 | 
31 | def get_ppo_ray_runtime_env():
32 |     """
33 |     A filter function to return the PPO Ray runtime environment.
34 |     To avoid repeat of some environment variables that are already set.
35 |     """
36 |     working_dir = (
37 |         json.loads(os.environ.get(RAY_JOB_CONFIG_JSON_ENV_VAR, "{}")).get("runtime_env", {}).get("working_dir", None)
38 |     )
39 | 
40 |     runtime_env = {
41 |         "env_vars": PPO_RAY_RUNTIME_ENV["env_vars"].copy(),
42 |         **({"working_dir": None} if working_dir is None else {}),
43 |     }
44 |     for key in list(runtime_env["env_vars"].keys()):
45 |         if os.environ.get(key) is not None:
46 |             runtime_env["env_vars"].pop(key, None)
47 |     return runtime_env
48 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/reward_model/dp_reward_model.yaml:
--------------------------------------------------------------------------------
 1 | # Format checks enforced on CI:
 2 | # 1. Comments must appear above each field.
 3 | # 2. There must be a blank line between each field.
 4 | # 3. Inline comments (after a field on the same line) are not allowed.
 5 | # 4. Indentation level is respected for nested fields.
 6 | 
 7 | # defaults specify the default config from each component
 8 | defaults:
 9 | 
10 |   # dp actor config, inheriting from trainer/config/reward_model/reward_model.yaml
11 |   - reward_model
12 | 
13 |   # load the reference default config, then apply the fields in the current yaml
14 |   - _self_
15 | 
16 | strategy: fsdp
17 | 
18 | model:
19 | 
20 |   # Whether to use shared memory for loading the model
21 |   use_shm: False
22 | 
23 |   # Use remove padding optimization (saves compute)
24 |   use_remove_padding: False
25 | 
26 |   # Whether to use fused reward kernels for speedup
27 |   use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels}
28 | 
29 |   # FSDP-specific config
30 |   fsdp_config:
31 | 
32 |     # Target configuration dataclass
33 |     _target_: verl.workers.config.FSDPEngineConfig
34 | 
35 |     # Policy for wrapping layers with FSDP
36 |     wrap_policy:
37 | 
38 |       # Minimum number of parameters to trigger wrapping
39 |       min_num_params: 0
40 | 
41 |     # Whether to offload model parameters to CPU
42 |     param_offload: False
43 | 
44 |     # Only for FSDP2: Reshard after forward pass to reduce memory footprint
45 |     reshard_after_forward: True
46 | 
47 |     # Number of GPUs in each FSDP shard group; -1 means auto
48 |     fsdp_size: -1
49 | 
50 |     # Only for FSDP1: FSDP1 configuration, prefetch the next forward-pass all-gather
51 |     # before the current forward computation.
52 |     forward_prefetch: False
53 | 
54 | # Sequence parallelism size for Ulysses-style model parallelism
55 | ulysses_sequence_parallel_size: 1


--------------------------------------------------------------------------------
/utils/data_preprocessing.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Any
 2 | 
 3 | import json
 4 | from pathlib import Path
 5 | 
 6 | import numpy as np
 7 | import h5py
 8 | import datasets
 9 | from huggingface_hub import snapshot_download
10 | from dataclasses import dataclass
11 | import sympy
12 | 
13 | import warnings
14 | 
15 | import argparse
16 | import os
17 | import re
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument("--local_dir", default="./data/inference")
23 |     
24 |     args = parser.parse_args()
25 |     local_dir = args.local_dir
26 |     output_dir = "./data/inference"
27 |     
28 |     benchmark_dataset = []
29 |     sample_h5file_path = Path(local_dir) / "lsr_bench_data.hdf5"
30 |     
31 |     # lsr_synth
32 |     for dataset_identifier in ['matsci','chem_react','bio_pop_growth','phys_osc']:
33 |         ds = datasets.load_dataset(local_dir)[f'lsr_synth_{dataset_identifier}']
34 |         with h5py.File(sample_h5file_path, "r") as sample_file:
35 |             for e in ds:
36 |                 samples = {k:v[...].astype(np.float64) for k,v in sample_file[f'/lsr_synth/{dataset_identifier}/{e["name"]}'].items()}
37 |                 item = {
38 |                 'dataset_identifier': f'lsr_synth/{dataset_identifier}',
39 |                 'equation_idx': e['name'],
40 |                 'symbols': e['symbols'],
41 |                 'symbol_descs': e['symbol_descs'],
42 |                 'symbol_properties': e['symbol_properties'],
43 |                 'expression': e['expression'],
44 |                 'samples': samples # dict: ['train', 'test','ood_test']
45 |                 }
46 |                 benchmark_dataset.append(item)
47 | 
48 |     print(len(benchmark_dataset))
49 |     benchmark_dataset = datasets.Dataset.from_list(benchmark_dataset)
50 |     benchmark_dataset.to_parquet(os.path.join(output_dir, "llmsrbench.parquet"))


--------------------------------------------------------------------------------
/verl/verl/models/README.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 
 3 | ## Adding a New Huggingface Model
 4 | ### Step 1: Copy the model file from HF to verl
 5 | - Add a new file under verl/models/hf
 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf
 7 | 
 8 | ### Step 2: Modify the model file to use packed inputs
 9 | - Remove all the code related to inference (kv cache)
10 | - Modify the inputs to include only
11 |     - input_ids (total_nnz,)
12 |     - cu_seqlens (total_nnz + 1,)
13 |     - max_seqlen_in_batch: int
14 | - Note that this requires using flash attention with causal mask.
15 | 
16 | ### Step 2.5: Add tests
17 | - Add a test to compare this version and the huggingface version
18 | - Following the infrastructure and add tests to tests/models/hf
19 | 
20 | ### Step 3: Add a function to apply tensor parallelism
21 | - Please follow
22 |     - https://pytorch.org/docs/stable/distributed.tensor.parallel.html
23 |     - https://pytorch.org/tutorials/intermediate/TP_tutorial.html
24 | - General comments
25 |     - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward.
26 | 
27 | ### Step 4: Add a function to apply data parallelism
28 | - Please use FSDP2 APIs
29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413
30 | 
31 | ### Step 5: Add a function to apply pipeline parallelism
32 | - Comes in Pytorch 2.4
33 | - Currently only in alpha in nightly version
34 | - Check torchtitan for more details
35 | 
36 | 


--------------------------------------------------------------------------------
/verl/verl/workers/reward_model/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for reward model
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class BasePPORewardModel(ABC):
24 |     def __init__(self, config):
25 |         self.config = config
26 | 
27 |     @abstractmethod
28 |     def compute_reward(self, data: DataProto) -> DataProto:
29 |         """Computing reward given input_ids. The transformers should output a tensor with shape
30 |            [batch_size, sequence_length], and the value at [EOS] mask should be gathered.
31 | 
32 |         Args:
33 |             data: must contain keys "input_ids", "attention_mask" and "position_ids".
34 |                 - input_ids: [batch_size, sequence_length]
35 |                 - attention_mask: [batch_size, sequence_length]
36 |                 - position_ids: [batch_size, sequence_length]
37 | 
38 |         Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward.
39 |             Other position should have zero reward. Note that this may change in the future if we use
40 |             dense reward. So, we leave the interface for general case.
41 |             - reward: [batch_size, sequence_length].
42 | 
43 |         """
44 |         pass
45 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 |   device: cuda
 5 | 
 6 | data:
 7 |   path: ~/data/rlhf/math/test.parquet
 8 |   prompt_key: prompt
 9 |   n_samples: 5
10 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
11 |   batch_size: 128
12 | 
13 | model:
14 |   path: ~/models/Qwen2-7B-Instruct
15 |   external_lib: null
16 | rollout:
17 |   _target_: verl.workers.config.RolloutConfig
18 |   name: vllm
19 |   mode: sync # sync: LLM, async: AsyncLLM
20 |   temperature: 1.0
21 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
22 |   top_p: 0.7
23 |   prompt_length: 1536
24 |   response_length: 512
25 |   # for vllm rollout
26 |   dtype: bfloat16 # should align with FSDP
27 |   gpu_memory_utilization: 0.5
28 |   ignore_eos: False
29 |   enforce_eager: True
30 |   free_cache_engine: True
31 |   load_format: dummy_dtensor
32 |   tensor_model_parallel_size: 1
33 |   max_num_batched_tokens: 8192
34 |   max_model_len: null
35 |   max_num_seqs: 1024
36 |   log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
37 |   log_prob_micro_batch_size_per_gpu: 8
38 |   # for hf rollout
39 |   do_sample: True
40 |   disable_log_stats: True
41 |   enable_chunked_prefill: True
42 |   n: 1
43 |   # support logging rollout prob for debugging purpose
44 |   calculate_log_probs: False
45 | actor:
46 |   strategy: fsdp  # This is for backward-compatibility
47 |   ulysses_sequence_parallel_size: 1 # sp size
48 |   entropy_from_logits_with_chunking: False  # calculate entropy with chunking to reduce memory peak
49 |   entropy_checkpointing: False  # recompute entropy
50 |   fsdp_config:
51 |     fsdp_size: -1
52 |     forward_prefetch: False  # FSDP1 forward_prefetch configuration
53 | 
54 | ray_kwargs:
55 |   ray_init:
56 |     num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
57 |   timeline_json_file: null
58 | 


--------------------------------------------------------------------------------
/verl/verl/models/llama/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | 
17 | import torch
18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
19 | from megatron.core import ModelParallelConfig
20 | from torch import nn
21 | from transformers import LlamaConfig
22 | 
23 | from verl.utils.megatron import sequence_parallel as sp_utils
24 | 
25 | 
26 | class ParallelLlamaRMSNorm(nn.Module):
27 |     def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig):
28 |         """
29 |         LlamaRMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(
43 |             input=hidden_states,
44 |             weight=self.weight,
45 |             normalized_shape=self.normalized_shape,
46 |             eps=self.variance_epsilon,
47 |             memory_efficient=True,
48 |         )
49 | 


--------------------------------------------------------------------------------
/verl/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | 
17 | import torch
18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
19 | from megatron.core import ModelParallelConfig
20 | from torch import nn
21 | from transformers import Qwen2Config
22 | 
23 | from verl.utils.megatron import sequence_parallel as sp_utils
24 | 
25 | 
26 | class ParallelQwen2RMSNorm(nn.Module):
27 |     def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig):
28 |         """
29 |         Qwen2RMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(
43 |             input=hidden_states,
44 |             weight=self.weight,
45 |             normalized_shape=self.normalized_shape,
46 |             eps=self.variance_epsilon,
47 |             memory_efficient=True,
48 |         )
49 | 


--------------------------------------------------------------------------------
/verl/verl/utils/metric/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Metrics utils.
16 | """
17 | 
18 | from typing import Any
19 | 
20 | import numpy as np
21 | 
22 | 
23 | def reduce_metrics(metrics: dict[str, list[Any]]) -> dict[str, Any]:
24 |     """
25 |     Reduces a dictionary of metric lists by computing the mean, max, or min of each list.
26 |     The reduce operation is determined by the key name:
27 |     - If the key contains "max", np.max is used
28 |     - If the key contains "min", np.min is used
29 |     - Otherwise, np.mean is used
30 | 
31 |     Args:
32 |         metrics: A dictionary mapping metric names to lists of metric values.
33 | 
34 |     Returns:
35 |         A dictionary with the same keys but with each list replaced by its reduced value.
36 | 
37 |     Example:
38 |         >>> metrics = {
39 |         ...     "loss": [1.0, 2.0, 3.0],
40 |         ...     "accuracy": [0.8, 0.9, 0.7],
41 |         ...     "max_reward": [5.0, 8.0, 6.0],
42 |         ...     "min_error": [0.1, 0.05, 0.2]
43 |         ... }
44 |         >>> reduce_metrics(metrics)
45 |         {"loss": 2.0, "accuracy": 0.8, "max_reward": 8.0, "min_error": 0.05}
46 |     """
47 |     for key, val in metrics.items():
48 |         if "max" in key:
49 |             metrics[key] = np.max(val)
50 |         elif "min" in key:
51 |             metrics[key] = np.min(val)
52 |         else:
53 |             metrics[key] = np.mean(val)
54 |     return metrics
55 | 


--------------------------------------------------------------------------------
/inference/scripts/inference.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | PARQUET_FILE_PATH="../data/llmsrbench.parquet"
 4 | OUTPUT_JSON_PATH="./output/memory_default/gpt-oss-120b-tool2_0_001_s40_max_25_top3_N1_test.json"
 5 | MODEL_PATH="../models/Qwen3-Coder-30B-A3B-Instruct"
 6 | 
 7 | MODEL_URL="http://0.0.0.0:30000/v1"
 8 | SANDBOX_URLS=(
 9 |     "http://127.0.0.1:9010/run_code"
10 |     "http://127.0.0.1:9020/run_code"
11 |     "http://127.0.0.1:9030/run_code"
12 |     "http://127.0.0.1:9040/run_code"
13 |     "http://127.0.0.1:9050/run_code"
14 |     "http://127.0.0.1:9060/run_code"
15 |     "http://127.0.0.1:9070/run_code"
16 |     "http://127.0.0.1:9080/run_code"
17 | )
18 | 
19 | 
20 | MAPE_THRESHOLD=0.001
21 | NUM_TURNS=5
22 | MAX_ASSISTANT_TURNS=5
23 | TOP_K=3
24 | SOURCE=(
25 |     "lsr_synth/bio_pop_growth"
26 |     )
27 | 
28 | 
29 | conda activate srscientist
30 | cd inference/
31 | 
32 | 
33 | TIMESTAMP=$(TZ='UTC-8' date +'%Y%m%d_%H%M%S')
34 | 
35 | 
36 | BASENAME=$(basename "$OUTPUT_JSON_PATH" .json)
37 | 
38 | # 3. Get the output directory and replace "output" with "log"
39 | OUTPUT_DIR=$(dirname "$OUTPUT_JSON_PATH")
40 | LOG_DIR="${OUTPUT_DIR/output/log}"
41 | 
42 | # 4. Construct the full log file path
43 | LOG_FILE="${LOG_DIR}/${BASENAME}_${TIMESTAMP}.log"
44 | 
45 | # 5. Create the log and output directories if they don't exist
46 | mkdir -p "$LOG_DIR"
47 | mkdir -p "$OUTPUT_DIR"
48 | 
49 | 
50 | 
51 | mkdir -p "$(dirname "$OUTPUT_JSON_PATH")"
52 | echo "--- Starting main.py execution at $(TZ='UTC-8' date) ---" | tee -a "$LOG_FILE"
53 | 
54 | python main.py \
55 |     --model-name "$MODEL_PATH" \
56 |     --model-url "$MODEL_URL" \
57 |     --sandbox-urls "${SANDBOX_URLS[@]}" \
58 |     --parquet-file-path "$PARQUET_FILE_PATH" \
59 |     --mape-threshold $MAPE_THRESHOLD \
60 |     --num-turns $NUM_TURNS \
61 |     --max-assistant-turns $MAX_ASSISTANT_TURNS \
62 |     --top-k $TOP_K \
63 |     --source "${SOURCE[@]}" \
64 |     --output-json-path "$OUTPUT_JSON_PATH" 2>&1 | tee "$LOG_FILE"
65 |     
66 | echo "--- Finished main.py execution at $(TZ='UTC-8' date) ---" | tee -a "$LOG_FILE"
67 | 


--------------------------------------------------------------------------------
/verl/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | **/*.pt
  3 | **/checkpoints
  4 | **/wget-log
  5 | **/_build/
  6 | **/*.ckpt
  7 | **/outputs
  8 | **/*.tar.gz
  9 | **/playground
 10 | **/wandb
 11 | 
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | dataset/*
 17 | tensorflow/my_graph/*
 18 | .idea/
 19 | # C extensions
 20 | *.so
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | env/
 25 | build/
 26 | develop-eggs/
 27 | dist/
 28 | downloads/
 29 | eggs/
 30 | .eggs/
 31 | lib/
 32 | lib64/
 33 | parts/
 34 | sdist/
 35 | var/
 36 | tmp/
 37 | *.egg-info/
 38 | .installed.cfg
 39 | *.egg
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *,cover
 60 | .hypothesis/
 61 | pytest.ini
 62 | output.txt
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | 
 85 | # IPython Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # celery beat schedule file
 92 | celerybeat-schedule
 93 | 
 94 | # dotenv
 95 | .env
 96 | 
 97 | # virtualenv
 98 | venv/
 99 | .venv/
100 | ENV/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # vscode
109 | .vscode
110 | 
111 | # Mac
112 | .DS_Store
113 | 
114 | # vim
115 | *.swp
116 | 
117 | # ckpt
118 | *.lock
119 | 
120 | # data
121 | *.parquet
122 | 
123 | 
124 | # local logs
125 | logs
126 | log
127 | outputs
128 | .history
129 | 


--------------------------------------------------------------------------------
/verl/verl/utils/megatron/sequence_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | import torch.nn.functional as F
18 | from megatron.core import parallel_state as mpu
19 | 
20 | 
21 | def mark_parameter_as_sequence_parallel(parameter):
22 |     parameter.sequence_parallel = True
23 | 
24 | 
25 | def is_sequence_parallel_param(param):
26 |     return hasattr(param, "sequence_parallel") and param.sequence_parallel
27 | 
28 | 
29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor):
30 |     """pad the tokens such that the total length is a multiple of sp world size
31 | 
32 |     Args:
33 |         unpad_tokens: (total_nnz, ...). Tokens after removing padding
34 | 
35 |     Returns:
36 |         the padded tokens: (total_nnz + pad_size,...)
37 | 
38 |     """
39 |     total_nnz = unpad_tokens.shape[0]
40 |     sp_world_size = mpu.get_tensor_model_parallel_world_size()
41 | 
42 |     pad_size = 0 if total_nnz % sp_world_size == 0 else sp_world_size - total_nnz % sp_world_size
43 | 
44 |     if pad_size > 0:
45 |         if unpad_tokens.ndim == 1:
46 |             unpad_tokens = F.pad(unpad_tokens, (0, pad_size))
47 |         elif unpad_tokens.ndim == 2:
48 |             unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size))
49 |         else:
50 |             raise NotImplementedError(f"Padding dim {unpad_tokens.ndim()} is not supported")
51 | 
52 |     return unpad_tokens
53 | 


--------------------------------------------------------------------------------
/verl/scripts/generate_trainer_config.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euox pipefail
 3 | 
 4 | 
 5 | # Define config specifications: "config_name:output_file:config_arg"
 6 | CONFIG_SPECS=(
 7 |     "ppo_trainer:_generated_ppo_trainer.yaml:"
 8 |     "ppo_megatron_trainer:_generated_ppo_megatron_trainer.yaml:--config-name=ppo_megatron_trainer.yaml"
 9 | )
10 | 
11 | generate_config() {
12 |     local config_name="$1"
13 |     local output_file="$2"
14 |     local config_arg="$3"
15 |     
16 |     local target_cfg="verl/trainer/config/${output_file}"
17 |     local tmp_header=$(mktemp)
18 |     local tmp_cfg=$(mktemp)
19 |     
20 |     echo "# This reference configration yaml is automatically generated via 'scripts/generate_trainer_config.sh'" > "$tmp_header"
21 |     echo "# in which it invokes 'python3 scripts/print_cfg.py --cfg job ${config_arg}' to flatten the 'verl/trainer/config/${config_name}.yaml' config fields into a single file." >> "$tmp_header"
22 |     echo "# Do not modify this file directly." >> "$tmp_header"
23 |     echo "# The file is usually only for reference and never used." >> "$tmp_header"
24 |     echo "" >> "$tmp_header"
25 |     
26 |     python3 scripts/print_cfg.py --cfg job ${config_arg} > "$tmp_cfg"
27 |     
28 |     cat "$tmp_header" > "$target_cfg"
29 |     sed -n '/^actor_rollout_ref/,$p' "$tmp_cfg" >> "$target_cfg"
30 |     
31 |     rm "$tmp_cfg" "$tmp_header"
32 |     
33 |     echo "Generated: $target_cfg"
34 | }
35 | 
36 | for spec in "${CONFIG_SPECS[@]}"; do
37 |     IFS=':' read -r config_name output_file config_arg <<< "$spec"
38 |     generate_config "$config_name" "$output_file" "$config_arg"
39 | done
40 | 
41 | for spec in "${CONFIG_SPECS[@]}"; do
42 |     IFS=':' read -r config_name output_file config_arg <<< "$spec"
43 |     target_cfg="verl/trainer/config/${output_file}"
44 |     if ! git diff --exit-code -- "$target_cfg" >/dev/null; then
45 |         echo "✖ $target_cfg is out of date. Please regenerate via 'scripts/generate_trainer_config.sh' and commit the changes."
46 |         exit 1
47 |     fi
48 | done
49 | 
50 | echo "All good"
51 | exit 0
52 | 


--------------------------------------------------------------------------------
/verl/verl/workers/roles/actor.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from verl import DataProto
16 | from verl.single_controller.base import Worker
17 | from verl.single_controller.base.decorator import Dispatch, register
18 | 
19 | 
20 | class ActorWorker(Worker):
21 |     """
22 |     This worker can be instantiated as a standalone actor or a standalone rollout or a standalone reference policy
23 |     or a hybrid engine based on the config.rollout
24 |     """
25 | 
26 |     def __init__(self, config):
27 |         raise NotImplementedError
28 | 
29 |     @register(dispatch_mode=Dispatch.ONE_TO_ALL)
30 |     def init_model(self):
31 |         raise NotImplementedError
32 | 
33 |     @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO)
34 |     def update_actor(self, data: DataProto):
35 |         raise NotImplementedError
36 | 
37 |     @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO)
38 |     def compute_log_prob(self, data: DataProto):
39 |         raise NotImplementedError
40 | 
41 |     @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO)
42 |     def compute_ref_log_prob(self, data: DataProto):
43 |         raise NotImplementedError
44 | 
45 |     @register(dispatch_mode=Dispatch.ONE_TO_ALL)
46 |     def save_checkpoint(self, local_path, hdfs_path=None, global_step=0, max_ckpt_to_keep=None):
47 |         raise NotImplementedError
48 | 
49 |     @register(dispatch_mode=Dispatch.ONE_TO_ALL)
50 |     def load_checkpoint(self, local_path, hdfs_path=None, del_local_after_load=False):
51 |         raise NotImplementedError
52 | 


--------------------------------------------------------------------------------
/verl/verl/third_party/vllm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from importlib.metadata import PackageNotFoundError, version
16 | 
17 | from packaging import version as vs
18 | 
19 | from verl.utils.import_utils import is_sglang_available
20 | 
21 | 
22 | def get_version(pkg):
23 |     try:
24 |         return version(pkg)
25 |     except PackageNotFoundError:
26 |         return None
27 | 
28 | 
29 | package_name = "vllm"
30 | package_version = get_version(package_name)
31 | vllm_version = None
32 | 
33 | if package_version is None:
34 |     if not is_sglang_available():
35 |         raise ValueError(
36 |             f"vllm version {package_version} not supported and SGLang also not Found. Currently supported "
37 |             f"vllm versions are 0.7.0+"
38 |         )
39 | elif vs.parse(package_version) >= vs.parse("0.7.0"):
40 |     vllm_version = package_version
41 |     from vllm import LLM
42 |     from vllm.distributed import parallel_state
43 | else:
44 |     if vs.parse(package_version) in [vs.parse("0.5.4"), vs.parse("0.6.3")]:
45 |         raise ValueError(
46 |             f"vLLM version {package_version} support has been removed. vLLM 0.5.4 and 0.6.3 are no longer "
47 |             f"supported. Please use vLLM 0.7.0 or later."
48 |         )
49 |     if not is_sglang_available():
50 |         raise ValueError(
51 |             f"vllm version {package_version} not supported and SGLang also not Found. Currently supported "
52 |             f"vllm versions are 0.7.0+"
53 |         )
54 | 
55 | __all__ = ["LLM", "parallel_state"]
56 | 


--------------------------------------------------------------------------------
/verl/verl/tools/utils/mcp_clients/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import threading
17 | import time
18 | 
19 | from mcp import Tool
20 | 
21 | logger = logging.getLogger(__file__)
22 | 
23 | 
24 | class TokenBucket:
25 |     def __init__(self, rate_limit: float):
26 |         self.rate_limit = rate_limit  # tokens per second
27 |         self.tokens = rate_limit
28 |         self.last_update = time.time()
29 |         self.lock = threading.Lock()
30 | 
31 |     def acquire(self) -> bool:
32 |         with self.lock:
33 |             now = time.time()
34 |             # Add new tokens based on time elapsed
35 |             new_tokens = (now - self.last_update) * self.rate_limit
36 |             self.tokens = min(self.rate_limit, self.tokens + new_tokens)
37 |             self.last_update = now
38 | 
39 |             if self.tokens >= 1:
40 |                 self.tokens -= 1
41 |                 return True
42 |             return False
43 | 
44 | 
45 | def mcp2openai(mcp_tool: Tool) -> dict:
46 |     """Convert a MCP Tool to an OpenAI ChatCompletionTool."""
47 |     openai_format = {
48 |         "type": "function",
49 |         "function": {
50 |             "name": mcp_tool.name,
51 |             "description": mcp_tool.description,
52 |             "parameters": mcp_tool.inputSchema,
53 |             "strict": False,
54 |         },
55 |     }
56 |     if not openai_format["function"]["parameters"].get("required", None):
57 |         openai_format["function"]["parameters"]["required"] = []
58 |     return openai_format
59 | 


--------------------------------------------------------------------------------
/verl/verl/workers/reward_manager/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Callable
16 | 
17 | from verl.workers.reward_manager.abstract import AbstractRewardManager
18 | 
19 | __all__ = ["register", "get_reward_manager_cls"]
20 | 
21 | REWARD_MANAGER_REGISTRY: dict[str, type[AbstractRewardManager]] = {}
22 | 
23 | 
24 | def register(name: str) -> Callable[[type[AbstractRewardManager]], type[AbstractRewardManager]]:
25 |     """Decorator to register a reward manager class with a given name.
26 | 
27 |     Args:
28 |         name: `(str)`
29 |             The name of the reward manager.
30 |     """
31 | 
32 |     def decorator(cls: type[AbstractRewardManager]) -> type[AbstractRewardManager]:
33 |         if name in REWARD_MANAGER_REGISTRY and REWARD_MANAGER_REGISTRY[name] != cls:
34 |             raise ValueError(
35 |                 f"Reward manager {name} has already been registered: {REWARD_MANAGER_REGISTRY[name]} vs {cls}"
36 |             )
37 |         REWARD_MANAGER_REGISTRY[name] = cls
38 |         return cls
39 | 
40 |     return decorator
41 | 
42 | 
43 | def get_reward_manager_cls(name: str) -> type[AbstractRewardManager]:
44 |     """Get the reward manager class with a given name.
45 | 
46 |     Args:
47 |         name: `(str)`
48 |             The name of the reward manager.
49 | 
50 |     Returns:
51 |         `(type)`: The reward manager class.
52 |     """
53 |     if name not in REWARD_MANAGER_REGISTRY:
54 |         raise ValueError(f"Unknown reward manager: {name}")
55 |     return REWARD_MANAGER_REGISTRY[name]
56 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/reward_model/megatron_reward_model.yaml:
--------------------------------------------------------------------------------
 1 | # defaults specify the default config from each component
 2 | defaults:
 3 | 
 4 |   # dp actor config, inheriting from trainer/config/reward_model/reward_model.yaml
 5 |   - reward_model
 6 | 
 7 |   # load the reference default config, then apply the fields in the current yaml
 8 |   - _self_
 9 | 
10 | strategy: megatron
11 | 
12 | # seconds, default is 10 minutes for torch, you can set it to a larger value
13 | # if you have long-running operations like 32B or 72B model using megatron
14 | nccl_timeout: 600
15 | 
16 | # Megatron parallelism & checkpointing config
17 | megatron:
18 | 
19 |   # Target configuration dataclass
20 |   _target_: verl.workers.config.MegatronEngineConfig
21 | 
22 |   # Whether to offload model parameters to CPU
23 |   param_offload: False
24 | 
25 |   # Number of GPUs in tensor model parallel group
26 |   tensor_model_parallel_size: 1
27 | 
28 |   # Number of GPUs in expert model parallel group
29 |   expert_model_parallel_size: 1
30 | 
31 |   # Expert tensor parallel size
32 |   expert_tensor_parallel_size: null
33 | 
34 |   # Number of pipeline model parallel stages
35 |   pipeline_model_parallel_size: 1
36 | 
37 |   # change VPP interface for parallelism tests
38 |   virtual_pipeline_model_parallel_size: null
39 | 
40 |   # Context parallel size
41 |   context_parallel_size: 1
42 | 
43 |   # Whether to use sequence parallelism
44 |   sequence_parallel: True
45 | 
46 |   # Whether to use distributed optimizer
47 |   use_distributed_optimizer: False
48 | 
49 |   # Whether to enable distributed checkpointing
50 |   use_dist_checkpointing: False
51 | 
52 |   # Path for distributed checkpoints
53 |   dist_checkpointing_path: null
54 | 
55 |   # RNG seed for megatron
56 |   seed: ${oc.select:actor_rollout_ref.actor.megatron.seed,42}
57 | 
58 |   # Any overrides to transformer config
59 |   override_transformer_config: ${oc.select:actor_rollout_ref.actor.megatron.override_transformer_config,{}}
60 | 
61 |   # Whether to use mbridge for faster comms
62 |   use_mbridge: ${oc.select:actor_rollout_ref.actor.megatron.use_mbridge,False}
63 | 
64 | # Whether to load weights (default True)
65 | load_weight: True


--------------------------------------------------------------------------------
/verl/verl/models/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib
16 | from typing import Optional
17 | 
18 | import torch.nn as nn
19 | 
20 | # Supported models in Megatron-LM
21 | # Architecture -> (module, class).
22 | _MODELS = {
23 |     "LlamaForCausalLM": (
24 |         "llama",
25 |         ("ParallelLlamaForCausalLMRmPadPP", "ParallelLlamaForValueRmPadPP", "ParallelLlamaForCausalLMRmPad"),
26 |     ),
27 |     "Qwen2ForCausalLM": (
28 |         "qwen2",
29 |         ("ParallelQwen2ForCausalLMRmPadPP", "ParallelQwen2ForValueRmPadPP", "ParallelQwen2ForCausalLMRmPad"),
30 |     ),
31 |     "MistralForCausalLM": (
32 |         "mistral",
33 |         ("ParallelMistralForCausalLMRmPadPP", "ParallelMistralForValueRmPadPP", "ParallelMistralForCausalLMRmPad"),
34 |     ),
35 | }
36 | 
37 | 
38 | # return model class
39 | class ModelRegistry:
40 |     @staticmethod
41 |     def load_model_cls(model_arch: str, value=False) -> Optional[type[nn.Module]]:
42 |         if model_arch not in _MODELS:
43 |             return None
44 | 
45 |         megatron = "megatron"
46 | 
47 |         module_name, model_cls_name = _MODELS[model_arch]
48 |         if not value:  # actor/ref
49 |             model_cls_name = model_cls_name[0]
50 |         elif value:  # critic/rm
51 |             model_cls_name = model_cls_name[1]
52 | 
53 |         module = importlib.import_module(f"verl.models.{module_name}.{megatron}.modeling_{module_name}_megatron")
54 |         return getattr(module, model_cls_name, None)
55 | 
56 |     @staticmethod
57 |     def get_supported_archs() -> list[str]:
58 |         return list(_MODELS.keys())
59 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/ref/ref.yaml:
--------------------------------------------------------------------------------
 1 | # actor_rollout_ref.ref: FSDP config same as actor. For models larger than 7B, it’s recommended to turn on offload for ref by default
 2 | strategy: ${actor_rollout_ref.actor.strategy}
 3 | 
 4 | # whether to enable torch.compile
 5 | # same as actor_rollout_ref.actor.use_torch_compile if it exists, otherwise 1
 6 | use_torch_compile: ${oc.select:actor_rollout_ref.actor.use_torch_compile,true}
 7 | 
 8 | # [Will be deprecated, use log_prob_micro_batch_size_per_gpu]
 9 | # The batch size for one forward pass in the computation of log_prob. Global batch size.
10 | log_prob_micro_batch_size: null
11 | 
12 | # The batch size for one forward pass in the computation of log_prob. Local batch size per GPU.
13 | log_prob_micro_batch_size_per_gpu: null
14 | 
15 | # enable dynamic batch size (sequence packing) for log_prob computation
16 | # same as actor_rollout_ref.actor.use_dynamic_bsz if it exists, otherwise false
17 | log_prob_use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}
18 | 
19 | # the max token length per GPU
20 | # same as actor_rollout_ref.actor.ppo_max_token_len_per_gpu if it exists, otherwise 16384
21 | log_prob_max_token_len_per_gpu: ${oc.select:actor_rollout_ref.actor.ppo_max_token_len_per_gpu,16384}
22 | 
23 | # profile the ref model in `compute_log_prob` 
24 | profiler:
25 | 
26 |   # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
27 |   _target_: verl.utils.profiler.ProfilerConfig
28 | 
29 |   # profiler tool, default same as profiler.tool in global config
30 |   # choices: nsys, npu, torch
31 |   tool: ${oc.select:global_profiler.tool,null}
32 | 
33 |   # whether enable profile on ref
34 |   enable: ${oc.select:actor_rollout_ref.actor.profiler.enable,false}
35 |   
36 |   # Whether to profile all ranks.
37 |   all_ranks: ${oc.select:actor_rollout_ref.actor.profiler.all_ranks,false}
38 | 
39 |   # The ranks that will be profiled. [] or [0,1,...]
40 |   ranks: ${oc.select:actor_rollout_ref.actor.profiler.ranks,[]}
41 | 
42 |   # profile results saving path
43 |   save_path: ${oc.select:global_profiler.save_path,null}
44 | 
45 |   # specific tool config
46 |   tool_config: ${oc.select:actor_rollout_ref.actor.profiler.tool_config,null}


--------------------------------------------------------------------------------
/verl/.github/workflows/.deprecate/e2e_prime.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_prime_deprecate
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - disabled_ci
 9 |   pull_request:
10 |     branches:
11 |       - disabled_ci
12 |     paths:
13 |       - "**/*.py"
14 |       # Other entrypoints
15 |       - "!examples/**"
16 |       - "!tests/**"
17 |       - "!verl/trainer/main_*.py"
18 |       - "!verl/trainer/fsdp_sft_trainer.py"
19 |       # Other recipes
20 |       - "!recipe/**"
21 |       # Megatron
22 |       - "!verl/workers/**/megatron_*.py"
23 |       # Home
24 |       - "recipe/prime"
25 |       # Entrypoints
26 |       - ".github/workflows/e2e_prime.yml"
27 |       - "examples/data_preprocess/gsm8k.py"
28 |       - "tests/special_e2e/run_prime.sh"
29 | 
30 | # Cancel jobs on the same ref if a new one is triggered
31 | concurrency:
32 |   group: ${{ github.workflow }}-${{ github.ref }}
33 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
34 | 
35 | # Declare permissions just read content.
36 | permissions:
37 |   contents: read
38 | 
39 | jobs:
40 |   e2e_prime:
41 |     runs-on: [L20x8]
42 |     timeout-minutes: 50 # Increase this timeout value as needed
43 |     env:
44 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
45 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
46 |       NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
47 |       HF_ENDPOINT: "https://hf-mirror.com"
48 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
49 |     container:
50 |       image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
51 |       options: --gpus all --shm-size=10g
52 |     steps:
53 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
54 |         with:
55 |           fetch-depth: 0
56 |       - name: Install the current repository
57 |         run: |
58 |           pip3 install --no-deps -e .[test,gpu]
59 |       - name: Prepare gsm8k dataset
60 |         run: |
61 |           ray stop --force
62 |           python3 examples/data_preprocess/gsm8k.py
63 |       - name: Running GSM8K E2E with prime alg
64 |         run: |
65 |           ray stop --force
66 |           bash tests/special_e2e/run_prime.sh
67 | 


--------------------------------------------------------------------------------
/verl/verl/models/transformers/npu_patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Copyright 2025 The Qwen Team and The HuggingFace Inc. team
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | import torch
19 | import torch_npu
20 | from torch_npu import npu_rotary_mul as apply_rotary_emb
21 | from transformers.models.qwen2_5_vl import modeling_qwen2_5_vl
22 | from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2RMSNorm
23 | 
24 | 
25 | # This patch takes effect when using apply_rotary_pos_emb_flashatt on qwen2_5_vl and will be removed in
26 | # subsequent versions
27 | # https://github.com/huggingface/transformers/pull/38491
28 | def apply_rotary_pos_emb_flashatt_npu(
29 |     q: torch.Tensor, k: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor
30 | ) -> tuple[torch.Tensor, torch.Tensor]:
31 |     cos = cos.chunk(2, dim=-1)[0].contiguous()
32 |     sin = sin.chunk(2, dim=-1)[0].contiguous()
33 |     cos = cos.repeat(1, 2)
34 |     sin = sin.repeat(1, 2)
35 |     q_embed = apply_rotary_emb(
36 |         q.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float()
37 |     ).type_as(q)
38 |     k_embed = apply_rotary_emb(
39 |         k.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float()
40 |     ).type_as(k)
41 |     return q_embed, k_embed
42 | 
43 | 
44 | # This api can improve performance on ASCEND NPU
45 | def rms_norm_forward(self, x):
46 |     return torch_npu.npu_rms_norm(x, self.weight, epsilon=self.variance_epsilon)[0]
47 | 
48 | 
49 | Qwen2RMSNorm.forward = rms_norm_forward
50 | modeling_qwen2_5_vl.apply_rotary_pos_emb_flashatt = apply_rotary_pos_emb_flashatt_npu
51 | 


--------------------------------------------------------------------------------
/verl/verl/utils/net_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
15 | #
16 | # Licensed under the Apache License, Version 2.0 (the "License");
17 | # you may not use this file except in compliance with the License.
18 | # You may obtain a copy of the License at
19 | #
20 | #     http://www.apache.org/licenses/LICENSE-2.0
21 | #
22 | # Unless required by applicable law or agreed to in writing, software
23 | # distributed under the License is distributed on an "AS IS" BASIS,
24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | # See the License for the specific language governing permissions and
26 | # limitations under the License.
27 | import ipaddress
28 | 
29 | 
30 | def is_ipv4(ip_str: str) -> bool:
31 |     """
32 |     Check if the given string is an IPv4 address
33 | 
34 |     Args:
35 |         ip_str: The IP address string to check
36 | 
37 |     Returns:
38 |         bool: Returns True if it's an IPv4 address, False otherwise
39 |     """
40 |     try:
41 |         ipaddress.IPv4Address(ip_str)
42 |         return True
43 |     except ipaddress.AddressValueError:
44 |         return False
45 | 
46 | 
47 | def is_ipv6(ip_str: str) -> bool:
48 |     """
49 |     Check if the given string is an IPv6 address
50 | 
51 |     Args:
52 |         ip_str: The IP address string to check
53 | 
54 |     Returns:
55 |         bool: Returns True if it's an IPv6 address, False otherwise
56 |     """
57 |     try:
58 |         ipaddress.IPv6Address(ip_str)
59 |         return True
60 |     except ipaddress.AddressValueError:
61 |         return False
62 | 


--------------------------------------------------------------------------------
/verl/verl/workers/actor/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for Actor
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | import torch
21 | 
22 | from verl import DataProto
23 | 
24 | __all__ = ["BasePPOActor"]
25 | 
26 | 
27 | class BasePPOActor(ABC):
28 |     def __init__(self, config):
29 |         """The base class for PPO actor
30 | 
31 |         Args:
32 |             config (DictConfig): a config passed to the PPOActor. We expect the type to be
33 |                 DictConfig (https://omegaconf.readthedocs.io/), but it can be any namedtuple in general.
34 |         """
35 |         super().__init__()
36 |         self.config = config
37 | 
38 |     @abstractmethod
39 |     def compute_log_prob(self, data: DataProto) -> torch.Tensor:
40 |         """Compute logits given a batch of data.
41 | 
42 |         Args:
43 |             data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```,
44 |                 ```attention_mask``` and ```position_ids```.
45 | 
46 |         Returns:
47 |             DataProto: a DataProto containing the key ```log_probs```
48 | 
49 | 
50 |         """
51 |         pass
52 | 
53 |     @abstractmethod
54 |     def update_policy(self, data: DataProto) -> dict:
55 |         """Update the policy with an iterator of DataProto
56 | 
57 |         Args:
58 |             data (DataProto): an iterator over the DataProto that returns by
59 |                 ```make_minibatch_iterator```
60 | 
61 |         Returns:
62 |             Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model
63 |             such as ```loss```, ```grad_norm```, etc,.
64 | 
65 |         """
66 |         pass
67 | 


--------------------------------------------------------------------------------
/verl/verl/utils/megatron/dist_checkpointing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from megatron.core import dist_checkpointing, mpu
16 | from megatron.core.dist_checkpointing.serialization import (
17 |     get_default_load_sharded_strategy,
18 |     get_default_save_sharded_strategy,
19 | )
20 | from megatron.core.dist_checkpointing.strategies.fully_parallel import (
21 |     FullyParallelLoadStrategyWrapper,
22 |     FullyParallelSaveStrategyWrapper,
23 | )
24 | 
25 | 
26 | def save_dist_checkpointing(sharded_state_dict, ckpt_path, async_save=False):
27 |     validate_sharding_integrity = True
28 |     # Get checkpointing strategies
29 |     save_strategy = get_default_save_sharded_strategy("torch_dist")
30 |     save_strategy = FullyParallelSaveStrategyWrapper(
31 |         save_strategy, mpu.get_data_parallel_group(with_context_parallel=True)
32 |     )
33 | 
34 |     # Save model sharded state dicts
35 |     async_save_request = dist_checkpointing.save(
36 |         sharded_state_dict,
37 |         ckpt_path,
38 |         sharded_strategy=save_strategy,
39 |         async_sharded_save=async_save,
40 |         validate_access_integrity=validate_sharding_integrity,
41 |     )
42 | 
43 |     return async_save_request
44 | 
45 | 
46 | def load_dist_checkpointing(sharded_state_dict, ckpt_dir):
47 |     # Get checkpointing strategies
48 |     load_strategy = get_default_load_sharded_strategy(ckpt_dir)
49 |     load_strategy = FullyParallelLoadStrategyWrapper(
50 |         load_strategy, mpu.get_data_parallel_group(with_context_parallel=True)
51 |     )
52 | 
53 |     # Load model sharded state dicts
54 |     state_dict = dist_checkpointing.load(sharded_state_dict, ckpt_dir, sharded_strategy=load_strategy)
55 | 
56 |     return state_dict
57 | 


--------------------------------------------------------------------------------
/verl/verl/utils/reward_score/prime_code/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Borrowed from: https://huggingface.co/spaces/codeparrot/apps_metric/blob/main/utils.py
16 | 
17 | import multiprocessing
18 | import os
19 | import sys
20 | import traceback
21 | from typing import Optional
22 | 
23 | from .testing_util import run_test
24 | 
25 | 
26 | def _temp_run(sample, generation, debug, result, metadata_list, timeout):
27 |     with open(os.devnull, "w") as devnull:
28 |         sys.stdout = devnull
29 |         sys.stderr = devnull
30 |         try:
31 |             res, metadata = run_test(in_outs=sample, test=generation, debug=debug, timeout=timeout)
32 |             result.append(res)
33 |             metadata_list.append(metadata)
34 |         except Exception:
35 |             # print(e) # some tracebacks are extremely long.
36 |             traceback.print_exc(10)
37 |             result.append([-1 for i in range(len(sample["inputs"]))])
38 |             metadata_list.append({})
39 | 
40 | 
41 | def check_correctness(in_outs: Optional[dict], generation, timeout=10, debug=True):
42 |     """Check correctness of code generation with a global timeout.
43 |     The global timeout is to catch some extreme/rare cases not handled by the timeouts
44 |     inside `run_test`"""
45 | 
46 |     manager = multiprocessing.Manager()
47 |     result = manager.list()
48 |     metadata_list = manager.list()
49 |     p = multiprocessing.Process(target=_temp_run, args=(in_outs, generation, debug, result, metadata_list, timeout))
50 |     p.start()
51 |     p.join(timeout=timeout + 1)
52 |     if p.is_alive():
53 |         p.kill()
54 |         # p.terminate()
55 |     if not result:
56 |         # consider that all tests failed
57 |         result = [[-1 for i in range(len(in_outs["inputs"]))]]
58 |         if debug:
59 |             print("global timeout")
60 |     return result[0], metadata_list
61 | 


--------------------------------------------------------------------------------
/verl/examples/sglang_multiturn/tool_config.yaml:
--------------------------------------------------------------------------------
 1 | tools:
 2 |   - class_name: "verl.tools.equation_evaluator_tool.EquationEvaluatorTool"
 3 |     config: 
 4 |       sandbox_fusion_urls:
 5 |         - "http://0.0.0.0:8010/run_code"
 6 |         - "http://0.0.0.0:8020/run_code"
 7 |         - "http://0.0.0.0:8030/run_code"
 8 |         - "http://0.0.0.0:8040/run_code"
 9 |         - "http://0.0.0.0:8050/run_code"
10 |         - "http://0.0.0.0:8060/run_code"
11 |         - "http://0.0.0.0:8070/run_code"
12 |         - "http://0.0.0.0:8080/run_code"
13 |       default_timeout: 30
14 |       default_language: "python"
15 |       memory_limit_mb: 1024
16 |       type: native
17 | 
18 |     tool_schema:
19 |       type: "function"
20 |       function:
21 |         name: "equation_evaluator"
22 |         description: "Accepts a mathematical equation as a Python function string, optimizes its parameters to fit a dataset using the BFGS method, and returns performance metrics (MSE, NMSE, MAPE) to evaluate its goodness of fit."
23 |         parameters:
24 |           type: "object"
25 |           properties:
26 |             equation:
27 |               type: "string"
28 |               description: "The equation to evaluate, provided as a complete Python function string."
29 |           required: ["equation"]
30 |   
31 | 
32 |   - class_name: "verl.tools.data_analyzer_tool.DataAnalyzerTool"
33 |     config: 
34 |       sandbox_fusion_urls:
35 |         - "http://0.0.0.0:8010/run_code"
36 |         - "http://0.0.0.0:8020/run_code"
37 |         - "http://0.0.0.0:8030/run_code"
38 |         - "http://0.0.0.0:8040/run_code"
39 |         - "http://0.0.0.0:8050/run_code"
40 |         - "http://0.0.0.0:8060/run_code"
41 |         - "http://0.0.0.0:8070/run_code"
42 |         - "http://0.0.0.0:8080/run_code"
43 |       default_timeout: 30
44 |       default_language: "python"
45 |       memory_limit_mb: 1024
46 |       type: native
47 | 
48 |     tool_schema:
49 |       type: "function"
50 |       function:
51 |         name: "data_analyzer"
52 |         description: "Executes Python code for data analysis and exploration on a given dataset to inspect for relationships or anomalies. This tool does not support data visualization or plotting libraries like Matplotlib."
53 |         parameters:
54 |           type: "object"
55 |           properties:
56 |             code:
57 |               type: "string"
58 |               description: "The Python code snippet for data analysis to execute."
59 |           required: ["code"]
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/README.md:
--------------------------------------------------------------------------------
 1 | ### Adding a New Workflow
 2 | 
 3 | When adding a new workflow for continuous integration (CI), you have two runner options: a fixed runner or a machine from the vemlp.
 4 | 
 5 | - **Fixed Runner**: To use a fixed runner, specify it in your workflow using the `runs-on` keyword, like `runs-on: [L20x8]`. 
 6 | - **Vemlp Runner**: Opting for a Vemlp machine allows you to launch tasks elastically. 
 7 | 
 8 | Here is a template to assist you. This template is designed for using Vemlp machines. Currently, for each workflow, you need to create a `setup` and a `cleanup` job. When using this template, the main parts you need to modify are the `IMAGE` environment variable and the specific `job steps`.
 9 | 
10 | ```yaml
11 | name: Your Default Workflow
12 | 
13 | on:
14 |   push:
15 |     branches:
16 |       - main
17 |       - v0.*
18 |   pull_request:
19 |     branches:
20 |       - main
21 |       - v0.*
22 |     paths:
23 |       - "**/*.py"
24 |       - ".github/workflows/template.yml"
25 | 
26 | concurrency:
27 |   group: ${{ github.workflow }}-${{ github.ref }}
28 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
29 | 
30 | permissions:
31 |   contents: read
32 | 
33 | env:
34 |   IMAGE: "your vemlp image" # e.g. "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.4-vllm0.8.5-mcore0.12.2"
35 |   DYNAMIC_RUNNER_URL: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner" # public veFaas api
36 | 
37 | jobs:
38 |   setup:
39 |     if: github.repository_owner == 'volcengine'
40 |     runs-on: ubuntu-latest
41 |     outputs:
42 |       runner-label: ${{ steps.create-runner.outputs.runner-label }}
43 |       task-id: ${{ steps.create-runner.outputs.task-id }}
44 |     steps:
45 |       - uses: actions/checkout@v4
46 |       - id: create-runner
47 |         uses: volcengine/vemlp-github-runner@v1 
48 |         with:
49 |           mode: "create"
50 |           faas-url: "${{ env.DYNAMIC_RUNNER_URL }}"
51 |           image: "${{ env.DEFAULT_IMAGE }}"
52 | 
53 |   your_job:
54 |     needs: setup
55 |     runs-on: ["${{ needs.setup.outputs.runner-label || 'default-runner' }}"]
56 |     steps:
57 |       xxxx # your jobs
58 | 
59 |   cleanup:
60 |     runs-on: ubuntu-latest
61 |     needs: [setup, your_job]
62 |     if: always()
63 |     steps:
64 |       - id: destroy-runner
65 |         uses: volcengine/vemlp-github-runner@v1
66 |         with:
67 |           mode: "destroy"
68 |           faas-url: "${{ env.DYNAMIC_RUNNER_URL }}"
69 |           task-id: "${{ needs.setup.outputs.task-id }}"


--------------------------------------------------------------------------------
/verl/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib
16 | import logging
17 | import os
18 | from importlib.metadata import PackageNotFoundError
19 | from importlib.metadata import version as get_version
20 | 
21 | from packaging.version import parse as parse_version
22 | 
23 | from .protocol import DataProto
24 | from .utils.device import is_npu_available
25 | from .utils.logging_utils import set_basic_config
26 | 
27 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
28 | 
29 | with open(os.path.join(version_folder, "version/version")) as f:
30 |     __version__ = f.read().strip()
31 | 
32 | 
33 | set_basic_config(level=logging.WARNING)
34 | 
35 | 
36 | __all__ = ["DataProto", "__version__"]
37 | 
38 | if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true":
39 |     if importlib.util.find_spec("modelscope") is None:
40 |         raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`")
41 |     # Patch hub to download models from modelscope to speed up.
42 |     from modelscope.utils.hf_util import patch_hub
43 | 
44 |     patch_hub()
45 | 
46 | if is_npu_available:
47 |     from .models.transformers import npu_patch as npu_patch
48 | 
49 |     package_name = "transformers"
50 |     required_version_spec = "4.52.4"
51 |     try:
52 |         installed_version = get_version(package_name)
53 |         installed = parse_version(installed_version)
54 |         required = parse_version(required_version_spec)
55 | 
56 |         if installed < required:
57 |             raise ValueError(
58 |                 f"{package_name} version >= {required_version_spec} is required on ASCEND NPU, current version is "
59 |                 f"{installed}."
60 |             )
61 |     except PackageNotFoundError as e:
62 |         raise ImportError(
63 |             f"package {package_name} is not installed, please run pip install {package_name}=={required_version_spec}"
64 |         ) from e
65 | 


--------------------------------------------------------------------------------
/verl/verl/utils/torch_dtypes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Adapted from Cruise.
16 | """
17 | 
18 | import torch
19 | 
20 | HALF_LIST = [16, "16", "fp16", "float16", torch.float16]
21 | FLOAT_LIST = [32, "32", "fp32", "float32", torch.float32]
22 | BFLOAT_LIST = ["bf16", "bfloat16", torch.bfloat16]
23 | 
24 | 
25 | class PrecisionType:
26 |     """Type of precision used.
27 | 
28 |     >>> PrecisionType.HALF == 16
29 |     True
30 |     >>> PrecisionType.HALF in (16, "16")
31 |     True
32 |     """
33 | 
34 |     HALF = "16"
35 |     FLOAT = "32"
36 |     FULL = "64"
37 |     BFLOAT = "bf16"
38 |     MIXED = "mixed"
39 | 
40 |     @staticmethod
41 |     def supported_type(precision: str | int) -> bool:
42 |         return any(x == precision for x in PrecisionType)
43 | 
44 |     @staticmethod
45 |     def supported_types() -> list[str]:
46 |         return [x.value for x in PrecisionType]
47 | 
48 |     @staticmethod
49 |     def is_fp16(precision):
50 |         return precision in HALF_LIST
51 | 
52 |     @staticmethod
53 |     def is_fp32(precision):
54 |         return precision in FLOAT_LIST
55 | 
56 |     @staticmethod
57 |     def is_bf16(precision):
58 |         return precision in BFLOAT_LIST
59 | 
60 |     @staticmethod
61 |     def to_dtype(precision):
62 |         if precision in HALF_LIST:
63 |             return torch.float16
64 |         elif precision in FLOAT_LIST:
65 |             return torch.float32
66 |         elif precision in BFLOAT_LIST:
67 |             return torch.bfloat16
68 |         else:
69 |             raise RuntimeError(f"unexpected precision: {precision}")
70 | 
71 |     @staticmethod
72 |     def to_str(precision):
73 |         if precision == torch.float16:
74 |             return "fp16"
75 |         elif precision == torch.float32:
76 |             return "fp32"
77 |         elif precision == torch.bfloat16:
78 |             return "bf16"
79 |         else:
80 |             raise RuntimeError(f"unexpected precision: {precision}")
81 | 


--------------------------------------------------------------------------------
/verl/verl/models/weight_loader_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def get_weight_loader(arch: str):
17 |     from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel
18 | 
19 |     _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {
20 |         "LlamaForCausalLM": load_state_dict_to_megatron_gptmodel,
21 |         "Qwen2ForCausalLM": load_state_dict_to_megatron_gptmodel,
22 |     }
23 | 
24 |     if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY:
25 |         return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch]
26 |     raise ValueError(
27 |         f"Model architectures {arch} loader are not supported for now. Supported architectures: "
28 |         f"{_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}"
29 |     )
30 | 
31 | 
32 | def get_weight_saver(arch: str):
33 |     from verl.models.mcore.saver import (
34 |         merge_megatron_ckpt_gptmodel,
35 |         merge_megatron_ckpt_gptmodel_dpskv3,
36 |         merge_megatron_ckpt_gptmodel_mixtral,
37 |         merge_megatron_ckpt_gptmodel_qwen2_5_vl,
38 |         merge_megatron_ckpt_gptmodel_qwen_moe,
39 |     )
40 | 
41 |     _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY = {
42 |         "LlamaForCausalLM": merge_megatron_ckpt_gptmodel,
43 |         "Qwen2ForCausalLM": merge_megatron_ckpt_gptmodel,
44 |         "MixtralForCausalLM": merge_megatron_ckpt_gptmodel_mixtral,
45 |         "Qwen2MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe,
46 |         "Qwen2_5_VLForConditionalGeneration": merge_megatron_ckpt_gptmodel_qwen2_5_vl,
47 |         "DeepseekV3ForCausalLM": merge_megatron_ckpt_gptmodel_dpskv3,
48 |         "Qwen3ForCausalLM": merge_megatron_ckpt_gptmodel,
49 |         "Qwen3MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe,
50 |     }
51 |     if arch in _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY:
52 |         return _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY[arch]
53 |     raise ValueError(
54 |         f"Model architectures {arch} saver are not supported for now. Supported architectures: "
55 |         f"{_MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY.keys()}"
56 |     )
57 | 


--------------------------------------------------------------------------------
/verl/verl/workers/engine/fsdp/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from torch.distributed.device_mesh import init_device_mesh
15 | 
16 | from verl.utils.device import get_device_name
17 | 
18 | 
19 | def create_device_mesh(world_size, fsdp_size):
20 |     """
21 |     Create a device mesh for distributed training based on the world size and FSDP size.
22 | 
23 |     Args:
24 |         world_size (int): Total number of processes in the distributed training setup.
25 |         fsdp_size (int): Size of the Fully Sharded Data Parallel (FSDP) group.
26 | 
27 |     Returns:
28 |         torch.distributed.device_mesh.DeviceMesh: The initialized device mesh.
29 |     """
30 |     device_name = get_device_name()
31 |     if fsdp_size < 0 or fsdp_size >= world_size:
32 |         device_mesh = init_device_mesh(device_name, mesh_shape=(world_size,), mesh_dim_names=["fsdp"])
33 |     else:
34 |         device_mesh = init_device_mesh(
35 |             device_name, mesh_shape=(world_size // fsdp_size, fsdp_size), mesh_dim_names=["ddp", "fsdp"]
36 |         )
37 |     return device_mesh
38 | 
39 | 
40 | def get_sharding_strategy(device_mesh):
41 |     """
42 |     Determine the appropriate sharding strategy based on the number of dimensions of the device mesh.
43 | 
44 |     Args:
45 |         device_mesh (torch.distributed.device_mesh.DeviceMesh): The device mesh used for distributed training.
46 | 
47 |     Returns:
48 |         torch.distributed.fsdp.ShardingStrategy: The sharding strategy to be used with FSDP.
49 | 
50 |     Raises:
51 |         NotImplementedError: If the number of dimensions of the device mesh is neither 1 nor 2.
52 |     """
53 |     from torch.distributed.fsdp import ShardingStrategy
54 | 
55 |     if device_mesh.ndim == 1:
56 |         sharding_strategy = ShardingStrategy.FULL_SHARD
57 |     elif device_mesh.ndim == 2:
58 |         sharding_strategy = ShardingStrategy.HYBRID_SHARD
59 |     else:
60 |         raise NotImplementedError(f"Get device mesh ndim={device_mesh.ndim}, but only support 1 or 2")
61 |     return sharding_strategy
62 | 


--------------------------------------------------------------------------------
/verl/verl/model_merger/__main__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | This module is used to merge huggingface model and test verl checkpoints from FSDP and Megatron backends.
17 | 
18 | To merge FSDP checkpoints:
19 | ```sh
20 | python -m verl.model_merger merge \
21 |     --backend fsdp \
22 |     --local_dir checkpoints/verl_fsdp_gsm8k_examples/qwen2_5_0b5_fsdp_saveload/global_step_1/actor \
23 |     --target_dir /path/to/merged_hf_model
24 | ```
25 | 
26 | To merge Megatron checkpoints:
27 | ```sh
28 | python -m verl.model_merger merge \
29 |     --backend megatron \
30 |     --tie-word-embedding \
31 |     --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor \
32 |     --target_dir /path/to/merged_hf_model
33 | ```
34 | 
35 | or use distribtued merge for large models like dpskv3 671B
36 | 
37 | ```sh
38 | torchrun --nproc_per_node 1 --nnodes 8 --node_rank ${RANK} -m verl.model_merger merge\
39 |     --backend megatron \
40 |     --local_dir ./checkpoints/global_step_1/actor \
41 |     --target_dir /path/to/merged_hf_model
42 | ```
43 | 
44 | 
45 | For more details, please refer to documentation:
46 | https://verl.readthedocs.io/en/latest/advance/checkpoint.html#convert-fsdp-and-megatron-checkpoints-to-huggingface-format-model
47 | """
48 | 
49 | from .base_model_merger import generate_config_from_args, parse_args
50 | 
51 | 
52 | def main():
53 |     args = parse_args()
54 |     config = generate_config_from_args(args)
55 |     print(f"config: {config}")
56 | 
57 |     if config.backend == "fsdp":
58 |         from .fsdp_model_merger import FSDPModelMerger
59 | 
60 |         merger = FSDPModelMerger(config)
61 |     elif config.backend == "megatron":
62 |         from .megatron_model_merger import MegatronModelMerger
63 | 
64 |         merger = MegatronModelMerger(config)
65 |     else:
66 |         raise NotImplementedError(f"Unknown backend: {config.backend}")
67 | 
68 |     merger.merge_and_save()
69 |     merger.cleanup()
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/verl/scripts/install_vllm_sglang_mcore.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | USE_MEGATRON=${USE_MEGATRON:-1}
 4 | USE_SGLANG=${USE_SGLANG:-1}
 5 | 
 6 | export MAX_JOBS=32
 7 | 
 8 | echo "1. install inference frameworks and pytorch they need"
 9 | if [ $USE_SGLANG -eq 1 ]; then
10 |     pip install "sglang[all]==0.4.6.post1" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
11 | fi
12 | pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata
13 | 
14 | echo "2. install basic packages"
15 | pip install "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
16 |     "numpy<2.0.0" "pyarrow>=15.0.0" pandas \
17 |     ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \
18 |     pytest py-spy pyext pre-commit ruff tensorboard 
19 | 
20 | pip install "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1"
21 | 
22 | 
23 | echo "3. install FlashAttention and FlashInfer"
24 | # Install flash-attn-2.7.4.post1 (cxx11abi=False)
25 | wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
26 |     pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
27 | 
28 | # Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
29 | # vllm-0.8.3 does not support flashinfer>=0.2.3
30 | # see https://github.com/vllm-project/vllm/pull/15777
31 | wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
32 |     pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
33 | 
34 | 
35 | if [ $USE_MEGATRON -eq 1 ]; then
36 |     echo "4. install TransformerEngine and Megatron"
37 |     echo "Notice that TransformerEngine installation can take very long time, please be patient"
38 |     NVTE_FRAMEWORK=pytorch pip3 install --no-deps git+https://github.com/NVIDIA/TransformerEngine.git@v2.2.1
39 |     pip3 install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.2
40 | fi
41 | 
42 | 
43 | echo "5. May need to fix opencv"
44 | pip install opencv-python
45 | pip install opencv-fixer && \
46 |     python -c "from opencv_fixer import AutoFix; AutoFix()"
47 | 
48 | 
49 | if [ $USE_MEGATRON -eq 1 ]; then
50 |     echo "6. Install cudnn python package (avoid being overridden)"
51 |     pip install nvidia-cudnn-cu12==9.8.0.87
52 | fi
53 | 
54 | echo "Successfully installed all packages"
55 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/e2e_sppo.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_sppo
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |       - v0.*
10 |     paths:
11 |       - "**/*.py"
12 |       # Other entrypoints
13 |       - "!examples/**"
14 |       - "!tests/**"
15 |       - "!verl/trainer/main_*.py"
16 |       - "!verl/trainer/fsdp_sft_trainer.py"
17 |       # Other recipes
18 |       - "!recipe/**"
19 |       # Megatron
20 |       - "!verl/workers/**/megatron_*.py"
21 |       # Home
22 |       - "recipe/sppo"
23 |       # Entrypoints
24 |       - ".github/workflows/e2e_sppo.yml"
25 |       - "examples/data_preprocess/gsm8k.py"
26 |       - "tests/special_e2e/run_sppo.sh"
27 |   pull_request:
28 |     branches:
29 |       - main
30 |       - v0.*
31 |     paths:
32 |       - "**/*.py"
33 |       # Other entrypoints
34 |       - "!examples/**"
35 |       - "!tests/**"
36 |       - "!verl/trainer/main_*.py"
37 |       - "!verl/trainer/fsdp_sft_trainer.py"
38 |       # Other recipes
39 |       - "!recipe/**"
40 |       # Megatron
41 |       - "!verl/workers/**/megatron_*.py"
42 |       # Home
43 |       - "recipe/sppo"
44 |       # Entrypoints
45 |       - ".github/workflows/e2e_sppo.yml"
46 |       - "examples/data_preprocess/gsm8k.py"
47 |       - "tests/special_e2e/run_sppo.sh"
48 | 
49 | # Declare permissions just read content.
50 | permissions:
51 |   contents: read
52 | 
53 | # Cancel jobs on the same ref if a new one is triggered
54 | concurrency:
55 |   group: ${{ github.workflow }}-${{ github.ref }}
56 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
57 | 
58 | jobs:
59 |   e2e_sppo:
60 |     runs-on: [L20x8]
61 |     timeout-minutes: 40 # Increase this timeout value as needed
62 |     env:
63 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
64 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
65 |       NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
66 |       HF_ENDPOINT: "https://hf-mirror.com"
67 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
68 |     container:
69 |       image: verlai/verl:app-verl0.5-sglang0.4.9.post6-mcore0.12.2-te2.2
70 |       options: --gpus all --shm-size=10g
71 |     steps:
72 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
73 |         with:
74 |           fetch-depth: 0
75 |       - name: Install the current repository
76 |         run: |
77 |           pip3 install -e .[test,gpu,sglang]
78 |       - name: Prepare MATH dataset
79 |         run: |
80 |           python3 examples/data_preprocess/math_dataset.py
81 |       - name: Running the E2E test with the SPPO algorithm
82 |         run: |
83 |           ray stop --force
84 |           bash tests/special_e2e/run_sppo.sh
85 | 


--------------------------------------------------------------------------------
/verl/verl/experimental/agent_loop/single_turn_agent_loop.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import logging
15 | import os
16 | from typing import Any
17 | from uuid import uuid4
18 | 
19 | from verl.experimental.agent_loop.agent_loop import AgentLoopBase, AgentLoopOutput, register
20 | from verl.utils.profiler import simple_timer
21 | 
22 | logger = logging.getLogger(__file__)
23 | logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN"))
24 | 
25 | 
26 | @register("single_turn_agent")
27 | class SingleTurnAgentLoop(AgentLoopBase):
28 |     """Naive agent loop that only do single turn chat completion."""
29 | 
30 |     def __init__(self, *args, **kwargs):
31 |         super().__init__(*args, **kwargs)
32 |         self.prompt_length = self.config.actor_rollout_ref.rollout.prompt_length
33 |         self.response_length = self.config.actor_rollout_ref.rollout.response_length
34 |         self.apply_chat_template_kwargs = self.config.data.get("apply_chat_template_kwargs", {})
35 | 
36 |     async def run(self, sampling_params: dict[str, Any], **kwargs) -> AgentLoopOutput:
37 |         messages = list(kwargs["raw_prompt"])
38 | 
39 |         metrics = {}
40 |         request_id = uuid4().hex
41 |         prompt_ids = await self.loop.run_in_executor(
42 |             None,
43 |             lambda: self.tokenizer.apply_chat_template(
44 |                 messages, add_generation_prompt=True, tokenize=True, **self.apply_chat_template_kwargs
45 |             ),
46 |         )
47 | 
48 |         with simple_timer("generate_sequences", metrics):
49 |             response_ids = await self.server_manager.generate(
50 |                 request_id=request_id, prompt_ids=prompt_ids, sampling_params=sampling_params
51 |             )
52 |         response_mask = [1] * len(response_ids)
53 | 
54 |         output = AgentLoopOutput(
55 |             prompt_ids=prompt_ids,
56 |             response_ids=response_ids[: self.response_length],
57 |             response_mask=response_mask[: self.response_length],
58 |             multi_modal_data={},
59 |             num_turns=2,
60 |             metrics=metrics,
61 |         )
62 |         return output
63 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/actor/dp_actor.yaml:
--------------------------------------------------------------------------------
 1 | # Format checks enforced on CI:
 2 | # 1. Comments must appear above each field.
 3 | # 2. There must be a blank line between each field.
 4 | # 3. Inline comments (after a field on the same line) are not allowed.
 5 | # 4. Indentation level is respected for nested fields.
 6 | 
 7 | # defaults specify the default config from each component
 8 | defaults:
 9 | 
10 |   # dp actor config, inheriting from trainer/config/actor/actor.yaml
11 |   - actor
12 | 
13 |   # load the reference default config, then apply the fields in the current yaml
14 |   - _self_
15 | 
16 | # Target class for this configuration
17 | _target_: verl.workers.config.FSDPActorConfig
18 | 
19 | # TODO(haibin.lin): switch to fsdp2
20 | strategy: fsdp
21 | 
22 | # Gradient clipping for actor updates, specific to the strategy.
23 | grad_clip: 1.0
24 | 
25 | # Sequence parallelism size for Ulysses-style model parallelism
26 | # oc.select: the default val for ref.ulysses_sequence_parallel_size
27 | ulysses_sequence_parallel_size: 1
28 | 
29 | # calculate entropy with chunking to reduce memory peak
30 | entropy_from_logits_with_chunking: False
31 | 
32 | # recompute entropy
33 | entropy_checkpointing: False
34 | 
35 | # optimizer configs
36 | optim:
37 | 
38 |   # Target class for this configuration
39 |   _target_: verl.workers.config.FSDPOptimizerConfig
40 | 
41 |   # Minimum LR ratio for cosine schedule
42 |   min_lr_ratio: 0.0
43 | 
44 |   # Number of cosine cycles in LR schedule
45 |   num_cycles: 0.5
46 | 
47 |   # LR warmup style: "constant" or "cosine"
48 |   warmup_style: constant
49 | 
50 | # configs for FSDP
51 | fsdp_config:
52 | 
53 |   # Target class for this configuration
54 |   _target_: verl.workers.config.FSDPEngineConfig
55 | 
56 |   # policy for wrapping the model
57 |   wrap_policy:
58 | 
59 |     # Minimum number of parameters to trigger wrapping a layer with FSDP
60 |     min_num_params: 0
61 | 
62 |   # Whether to offload model parameters to CPU (trades speed for memory)
63 |   param_offload: false
64 | 
65 |   # Whether to offload optimizer state to CPU
66 |   optimizer_offload: false
67 | 
68 |   # Only for FSDP2: offload param/grad/optimizer during train
69 |   offload_policy: false
70 | 
71 |   # Only for FSDP2: Reshard after forward pass to reduce memory footprint
72 |   reshard_after_forward: true
73 | 
74 |   # Number of GPUs in each FSDP shard group; -1 means auto
75 |   fsdp_size: -1
76 | 
77 |   # Only for FSDP1: FSDP1 configuration, prefetch the next forward-pass all-gather
78 |   # before the current forward computation.
79 |   forward_prefetch: False
80 | 
81 | # Whether to remove padding tokens in inputs during training
82 | use_remove_padding: ${oc.select:actor_rollout_ref.model.use_remove_padding,false}
83 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/check-pr-title.yml:
--------------------------------------------------------------------------------
 1 | # # Tests layout
 2 | 
 3 | # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
 4 | # - `tests/trainer` for testing functionality related to `verl/trainer`
 5 | # - `tests/models` for testing functionality related to `verl/models`
 6 | # - ...
 7 | 
 8 | # There are a few folders with `special_` prefix, created for special purposes:
 9 | # - `special_distributed`: unit tests that must run with multiple GPUs
10 | # - `special_e2e`: end-to-end tests with training/generation scripts
11 | # - `special_npu`: tests for NPUs
12 | # - `special_sanity`: a suite of quick sanity tests
13 | # - `special_standalone`: a set of test that are designed to run in dedicated environments
14 | 
15 | # Accelerators for tests 
16 | # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17 | # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18 | 
19 | # # Workflow layout
20 | 
21 | # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22 | # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23 | # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24 | # 3. End-to-end tests: `e2e_*.yml`
25 | # 4. Unit tests
26 | #   - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27 | #   - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28 | #   - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29 | #     - new workflow yaml is added to `.github/workflows`
30 | #     - new tests are added to workflow mentioned in 2.
31 | 
32 | 
33 | on:
34 |   pull_request:
35 |     types: [opened, edited, synchronize]
36 | 
37 | jobs:
38 |   check-title:
39 |     runs-on: ubuntu-latest
40 |     steps:
41 |       - name: Checkout code
42 |         uses: actions/checkout@v4
43 | 
44 |       - name: Set up Python
45 |         uses: actions/setup-python@v5
46 |         with:
47 |           python-version: '3.11'
48 | 
49 |       - name: Run PR title checker
50 |         run: python3 tests/special_sanity/check_pr_title.py
51 |         env:
52 |           PR_TITLE: ${{ github.event.pull_request.title }}
53 | 
54 |       - name: Run PR description checker
55 |         run: python3 tests/special_sanity/check_pr_description.py
56 |         env:
57 |           PR_TITLE: ${{ github.event.pull_request.title }}
58 |           GITHUB_EVENT_PATH: ${{ github.event_path }}
59 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/e2e_spin.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_spin
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |       - v0.*
10 |     paths:
11 |       - "**/*.py"
12 |       # Other entrypoints
13 |       - "!examples/**"
14 |       - "!tests/**"
15 |       - "!verl/trainer/main_*.py"
16 |       - "!verl/trainer/fsdp_sft_trainer.py"
17 |       # Other recipes
18 |       - "!recipe/**"
19 |       # Megatron
20 |       - "!verl/workers/**/megatron_*.py"
21 |       # Home
22 |       - "recipe/spin"
23 |       # Entrypoints
24 |       - ".github/workflows/e2e_spin.yml"
25 |       - "examples/data_preprocess/gsm8k.py"
26 |       - "tests/special_e2e/run_spin.sh"
27 |       - "!examples"
28 |   pull_request:
29 |     branches:
30 |       - main
31 |       - v0.*
32 |     paths:
33 |       - "**/*.py"
34 |       # Other entrypoints
35 |       - "!examples/**"
36 |       - "!tests/**"
37 |       - "!verl/trainer/main_*.py"
38 |       - "!verl/trainer/fsdp_sft_trainer.py"
39 |       # Other recipes
40 |       - "!recipe/**"
41 |       # Megatron
42 |       - "!verl/workers/**/megatron_*.py"
43 |       # Home
44 |       - "recipe/spin"
45 |       # Entrypoints
46 |       - ".github/workflows/e2e_spin.yml"
47 |       - "examples/data_preprocess/gsm8k.py"
48 |       - "tests/special_e2e/run_spin.sh"
49 |       - "!examples"
50 | 
51 | # Declare permissions just read content.
52 | permissions:
53 |   contents: read
54 | 
55 | # Cancel jobs on the same ref if a new one is triggered
56 | concurrency:
57 |   group: ${{ github.workflow }}-${{ github.ref }}
58 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
59 | 
60 | jobs:
61 |   e2e_spin:
62 |     runs-on: [L20x8]
63 |     timeout-minutes: 40 # Increase this timeout value as needed
64 |     env:
65 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
66 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
67 |       NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
68 |       HF_ENDPOINT: "https://hf-mirror.com"
69 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
70 |     container:
71 |       image: verlai/verl:app-verl0.5-sglang0.4.9.post6-mcore0.12.2-te2.2
72 |       options: --gpus all --shm-size=10g
73 |     steps:
74 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
75 |         with:
76 |           fetch-depth: 0
77 |       - name: Install the current repository
78 |         run: |
79 |           pip3 install -e .[test,gpu,sglang]
80 |       - name: Prepare GSM8K dataset
81 |         run: |
82 |           python3 examples/data_preprocess/gsm8k.py
83 |       - name: Running the E2E test with the spin algorithm
84 |         run: |
85 |           ray stop --force
86 |           bash tests/special_e2e/run_spin.sh
87 | 


--------------------------------------------------------------------------------
/verl/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### What does this PR do?
 2 | 
 3 | > Add **concise** overview of what this PR aims to achieve or accomplish. Reference related GitHub issues and PRs that help with the review.
 4 | 
 5 | ### Checklist Before Starting
 6 | 
 7 | - [ ] Search for similar PRs. Paste at least one query link here: ...
 8 | - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI)
 9 |   - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`
10 |   - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]`
11 |   - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test`
12 |   - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title.
13 |   - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching`
14 | 
15 | ### Test
16 | 
17 | > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc.
18 | 
19 | ### API and Usage Example
20 | 
21 | > Demonstrate how the API changes if any, and provide usage example(s) if possible.
22 | 
23 | ```python
24 | # Add code snippet or script demonstrating how to use this
25 | ```
26 | 
27 | ### Design & Code Changes
28 | 
29 | > Demonstrate the high-level design if this PR is complex, and list the specific changes.
30 | 
31 | ### Checklist Before Submitting
32 | 
33 | > [!IMPORTANT]
34 | > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review.
35 | 
36 | - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md).
37 | - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always`
38 | - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs).
39 | - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ...
40 | - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).)
41 | 


--------------------------------------------------------------------------------
/verl/verl/models/qwen2/megatron/layers/parallel_linear.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/linear.py
15 | 
16 | 
17 | from megatron.core import tensor_parallel
18 | 
19 | 
20 | class QKVParallelLinear(tensor_parallel.ColumnParallelLinear):
21 |     def __init__(
22 |         self,
23 |         input_size,
24 |         num_heads,
25 |         num_key_value_heads,
26 |         head_dim,
27 |         *,
28 |         bias=True,
29 |         gather_output=True,
30 |         skip_bias_add=False,
31 |         **kwargs,
32 |     ):
33 |         # Keep input parameters, and already restrict the head numbers
34 |         self.input_size = input_size
35 |         self.q_output_size = num_heads * head_dim
36 |         self.kv_output_size = num_key_value_heads * head_dim
37 |         self.head_dim = head_dim
38 |         self.gather_output = gather_output
39 |         self.skip_bias_add = skip_bias_add
40 | 
41 |         input_size = self.input_size
42 |         output_size = (num_heads + 2 * num_key_value_heads) * self.head_dim
43 | 
44 |         super().__init__(
45 |             input_size=input_size,
46 |             output_size=output_size,
47 |             bias=bias,
48 |             gather_output=gather_output,
49 |             skip_bias_add=skip_bias_add,
50 |             **kwargs,
51 |         )
52 | 
53 | 
54 | class MergedColumnParallelLinear(tensor_parallel.ColumnParallelLinear):
55 |     def __init__(
56 |         self,
57 |         input_size,
58 |         gate_ouput_size,
59 |         up_output_size,
60 |         *,
61 |         bias=True,
62 |         gather_output=True,
63 |         skip_bias_add=False,
64 |         **kwargs,
65 |     ):
66 |         # Keep input parameters, and already restrict the head numbers
67 |         self.input_size = input_size
68 |         self.output_size = gate_ouput_size + up_output_size
69 |         self.gather_output = gather_output
70 |         self.skip_bias_add = skip_bias_add
71 | 
72 |         super().__init__(
73 |             input_size=self.input_size,
74 |             output_size=self.output_size,
75 |             bias=bias,
76 |             gather_output=gather_output,
77 |             skip_bias_add=skip_bias_add,
78 |             **kwargs,
79 |         )
80 | 


--------------------------------------------------------------------------------
/verl/verl/utils/rendezvous/ray_backend.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import time
17 | 
18 | import ray
19 | from cupy.cuda.nccl import NcclCommunicator, get_unique_id
20 | from ray.util import list_named_actors
21 | 
22 | 
23 | @ray.remote
24 | class NCCLIDStore:
25 |     def __init__(self, nccl_id):
26 |         self._nccl_id = nccl_id
27 | 
28 |     def get(self):
29 |         return self._nccl_id
30 | 
31 | 
32 | def get_nccl_id_store_by_name(name):
33 |     all_actors = list_named_actors(all_namespaces=True)
34 |     matched_actors = [actor for actor in all_actors if actor.get("name", None) == name]
35 |     if len(matched_actors) == 1:
36 |         actor = matched_actors[0]
37 |         return ray.get_actor(**actor)
38 |     elif len(matched_actors) > 1:
39 |         logging.warning("multiple actors with same name found: %s", matched_actors)
40 |     elif len(matched_actors) == 0:
41 |         logging.info("failed to get any actor named %s", name)
42 |     return None
43 | 
44 | 
45 | def create_nccl_communicator_in_ray(
46 |     rank: int, world_size: int, group_name: str, max_retries: int = 100, interval_s: int = 5
47 | ):
48 |     if rank == 0:
49 |         nccl_id = get_unique_id()
50 |         nccl_id_store = NCCLIDStore.options(name=group_name).remote(nccl_id)
51 | 
52 |         assert ray.get(nccl_id_store.get.remote()) == nccl_id
53 |         communicator = NcclCommunicator(
54 |             ndev=world_size,
55 |             commId=nccl_id,
56 |             rank=0,
57 |         )
58 |         return communicator
59 |     else:
60 |         for i in range(max_retries):
61 |             nccl_id_store = get_nccl_id_store_by_name(group_name)
62 |             if nccl_id_store is not None:
63 |                 logging.info("nccl_id_store %s got", group_name)
64 |                 nccl_id = ray.get(nccl_id_store.get.remote())
65 |                 logging.info("nccl id for %s got: %s", group_name, nccl_id)
66 |                 communicator = NcclCommunicator(
67 |                     ndev=world_size,
68 |                     commId=nccl_id,
69 |                     rank=rank,
70 |                 )
71 |                 return communicator
72 |             logging.info("failed to get nccl_id for %d time, sleep for %d seconds", i + 1, interval_s)
73 |             time.sleep(interval_s)
74 | 


--------------------------------------------------------------------------------
/verl/.github/workflows/scorecard.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub. They are provided
 2 | # by a third-party and are governed by separate terms of service, privacy
 3 | # policy, and support documentation.
 4 | 
 5 | name: Scorecard supply-chain security
 6 | on:
 7 |   # For Branch-Protection check. Only the default branch is supported. See
 8 |   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
 9 |   branch_protection_rule:
10 |   # To guarantee Maintained check is occasionally updated. See
11 |   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
12 |   schedule:
13 |     - cron: "27 7 * * 1"
14 |   push:
15 |     branches:
16 |       - main
17 |       - v0.*
18 | 
19 | # Declare default permissions as read only.
20 | permissions: read-all
21 | 
22 | jobs:
23 |   analysis:
24 |     name: Scorecard analysis
25 |     runs-on: ubuntu-latest
26 |     permissions:
27 |       # Needed to upload the results to code-scanning dashboard.
28 |       security-events: write
29 |       # Needed to publish results and get a badge (see publish_results below).
30 |       id-token: write
31 |       # Uncomment the permissions below if installing in a private repository.
32 |       # contents: read
33 |       # actions: read
34 | 
35 |     steps:
36 |       - name: "Checkout code"
37 |         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
38 |         with:
39 |           persist-credentials: false
40 | 
41 |       - name: "Run analysis"
42 |         uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
43 |         with:
44 |           results_file: results.sarif
45 |           results_format: sarif
46 |           # (Optional) "write" PAT token. Uncomment the `repo_token` line below if:
47 |           # - you want to enable the Branch-Protection check on a *public* repository, or
48 |           # - you are installing Scorecard on a *private* repository
49 |           # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional.
50 |           # repo_token: ${{ secrets.SCORECARD_TOKEN }}
51 | 
52 |           # Public repositories:
53 |           #   - Publish results to OpenSSF REST API for easy access by consumers
54 |           #   - Allows the repository to include the Scorecard badge.
55 |           #   - See https://github.com/ossf/scorecard-action#publishing-results.
56 |           # For private repositories:
57 |           #   - `publish_results` will always be set to `false`, regardless
58 |           #     of the value entered here.
59 |           publish_results: true
60 | 
61 |       # Upload the results to GitHub's code scanning dashboard (optional).
62 |       # Commenting out will disable upload of results to your repo's Code Scanning dashboard
63 |       - name: "Upload to code-scanning"
64 |         uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 #v3.28.9
65 |         with:
66 |           sarif_file: results.sarif
67 | 


--------------------------------------------------------------------------------
/verl/verl/utils/megatron/pipeline_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | from megatron.core import parallel_state as mpu
18 | 
19 | from .sequence_parallel import pad_to_sequence_parallel
20 | 
21 | 
22 | def compute_transformers_input_shapes(batches, meta_info):
23 |     from flash_attn.bert_padding import unpad_input  # flash 2 is a must for Megatron
24 | 
25 |     # pre-compute input shapes for each micro-batch at each pp stage
26 |     input_shapes = []
27 |     for model_inputs in batches:
28 |         input_ids = model_inputs["input_ids"]
29 |         attention_mask = model_inputs["attention_mask"]
30 |         input_ids_rmpad = unpad_input(input_ids.unsqueeze(dim=-1), attention_mask)[0]  # (total_nnz, 1)
31 |         if meta_info["sequence_parallel"]:
32 |             input_ids_rmpad = pad_to_sequence_parallel(input_ids_rmpad)
33 |             # compute shapes for model_inputs
34 |             input_shapes.append(
35 |                 torch.Size(
36 |                     [
37 |                         input_ids_rmpad.shape[0] // mpu.get_tensor_model_parallel_world_size(),
38 |                         1,
39 |                         meta_info["hidden_size"],
40 |                     ]
41 |                 )
42 |             )
43 |         else:
44 |             # compute shapes for model_inputs
45 |             input_shapes.append(torch.Size([input_ids_rmpad.shape[0], 1, meta_info["hidden_size"]]))
46 |     return input_shapes
47 | 
48 | 
49 | def make_batch_generator(batches, vpp_size):
50 |     """
51 |     Creates a batch generator suitable for Megatron pipeline parallelism,
52 |     handling virtual pipeline parallelism (VPP).
53 | 
54 |     If VPP is used (vpp_size > 1), it duplicates the batch iterator for each
55 |     virtual pipeline stage. Otherwise, it returns a single iterator.
56 | 
57 |     Args:
58 |         batches: An iterable (e.g., list) of micro-batches.
59 |         vpp_size (int): The virtual pipeline model parallel size.
60 | 
61 |     Returns:
62 |         An iterator or a list of iterators over the micro-batches.
63 |     """
64 |     if vpp_size > 1:
65 |         # has vpp
66 |         batch_generator = [batches] * vpp_size  # number of vpp chunks
67 |         batch_generator = [iter(b) for b in batch_generator]
68 |     else:
69 |         # no vpp
70 |         batch_generator = iter(batches)
71 |     return batch_generator
72 | 


--------------------------------------------------------------------------------
/verl/verl/utils/reward_score/gsm8k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | _SOLUTION_CLIP_CHARS = 300
18 | 
19 | 
20 | def extract_solution(solution_str, method="strict"):
21 |     assert method in ["strict", "flexible"]
22 | 
23 |     # Optimization: Regular expression matching on very long strings can be slow.
24 |     # For math problems, the final answer is usually at the end.
25 |     # We only match on the last 300 characters, which is a safe approximation for 300 tokens.
26 |     if len(solution_str) > _SOLUTION_CLIP_CHARS:
27 |         solution_str = solution_str[-_SOLUTION_CLIP_CHARS:]
28 | 
29 |     if method == "strict":
30 |         # this also tests the formatting of the model
31 |         solutions = re.findall("#### (\\-?[0-9\\.\\,]+)", solution_str)
32 |         if len(solutions) == 0:
33 |             final_answer = None
34 |         else:
35 |             # take the last solution
36 |             final_answer = solutions[-1].replace(",", "").replace("$", "")
37 |     elif method == "flexible":
38 |         answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str)
39 |         final_answer = None
40 |         if len(answer) == 0:
41 |             # no reward is there is no answer
42 |             pass
43 |         else:
44 |             invalid_str = ["", "."]
45 |             # find the last number that is not '.'
46 |             for final_answer in reversed(answer):
47 |                 if final_answer not in invalid_str:
48 |                     break
49 |     return final_answer
50 | 
51 | 
52 | def compute_score(solution_str, ground_truth, method="strict", format_score=0.0, score=1.0):
53 |     """The scoring function for GSM8k.
54 | 
55 |     Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual
56 |     Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024.
57 | 
58 |     Args:
59 |         solution_str: the solution text
60 |         ground_truth: the ground truth
61 |         method: the method to extract the solution, choices are 'strict' and 'flexible'
62 |         format_score: the score for the format
63 |         score: the score for the correct answer
64 |     """
65 |     answer = extract_solution(solution_str=solution_str, method=method)
66 |     if answer is None:
67 |         return 0
68 |     else:
69 |         if answer == ground_truth:
70 |             return score
71 |         else:
72 |             return format_score
73 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
 4 |   micro_batch_size_per_gpu: 4  # this is also val batch size
 5 |   train_files: ~/data/gsm8k/train.parquet
 6 |   val_files: ~/data/gsm8k/test.parquet
 7 |   # Single-turn settings
 8 |   prompt_key: question
 9 |   response_key: answer
10 |   prompt_dict_keys: null
11 |   response_dict_keys: null
12 |   # Multi-turn settings
13 |   multiturn:
14 |     enable: false  # Set to true to use multi-turn dataset
15 |     messages_key: messages  # Key for messages list in multi-turn mode
16 |     tools_key: tools  # Key for tools list in multi-turn mode
17 |     enable_thinking_key: enable_thinking  # Whether to enable thinking in multi-turn mode
18 |   max_length: 1024
19 |   truncation: error
20 |   balance_dp_token: False
21 |   chat_template: null
22 |   custom_cls:
23 |     path: null
24 |     name: null
25 |   use_shm: False
26 |   apply_chat_template_kwargs: {}
27 | model:
28 |   partial_pretrain: ~/models/gemma-1.1-7b-it
29 |   use_shm: False
30 |   fsdp_config:
31 |     model_dtype: fp32
32 |     wrap_policy:
33 |       min_num_params: 0
34 |     cpu_offload: False
35 |     offload_params: False
36 |   external_lib: null
37 |   enable_gradient_checkpointing: True
38 |   trust_remote_code: False
39 |   lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
40 |   lora_alpha: 16  # LoRA scaling factor
41 |   target_modules: all-linear  # Target modules for LoRA adaptation
42 |   use_liger: False
43 |   strategy: fsdp2
44 | optim:
45 |   lr: 1e-5
46 |   betas: [0.9, 0.95]
47 |   weight_decay: 0.01
48 |   warmup_steps_ratio: 0.1
49 |   clip_grad: 1.0
50 |   lr_scheduler: cosine
51 | ulysses_sequence_parallel_size: 1
52 | use_remove_padding: False
53 | trainer:
54 |   default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
55 |   default_hdfs_dir: null
56 |   project_name: gsm8k-sft
57 |   experiment_name: test
58 |   total_epochs: 4
59 |   total_training_steps: null
60 |   logger: [ 'console', 'wandb' ]
61 |   seed: 1
62 |   save_freq: -1
63 |   test_freq: -1
64 |   nnodes: 1
65 |   n_gpus_per_node: 8
66 |   max_ckpt_to_keep: null  # Maximum number of checkpoints to keep, set to null to keep all
67 | 
68 |   # Resume mode: "auto", "disable", or "resume_path"
69 |   # "auto": resume from last checkpoint if available
70 |   # "disable": start from scratch
71 |   # "resume_path": resume from a user-defined path
72 |   resume_mode: auto
73 | 
74 |   # Path to resume training from (used when resume_mode is "resume_path" or "auto")
75 |   resume_from_path: null
76 | 
77 |   # Checkpoint configuration
78 |   checkpoint:
79 |     # What to include in saved checkpoints
80 |     # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
81 |     save_contents: ["model", "optimizer", "extra"]
82 | 
83 |     # For more flexibility, you can specify the contents to load from the checkpoint.
84 |     load_contents: ${trainer.checkpoint.save_contents}
85 |   device: cuda
86 | 


--------------------------------------------------------------------------------
/verl/verl/utils/device.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # This code is inspired by the torchtune.
 4 | # https://github.com/pytorch/torchtune/blob/main/torchtune/utils/_device.py
 5 | #
 6 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 7 | # All rights reserved.
 8 | #
 9 | # This source code is licensed under the BSD-style license in https://github.com/pytorch/torchtune/blob/main/LICENSE
10 | 
11 | import logging
12 | 
13 | import torch
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def is_torch_npu_available() -> bool:
19 |     """Check the availability of NPU"""
20 |     try:
21 |         import torch_npu  # noqa: F401
22 | 
23 |         return torch.npu.is_available()
24 |     except ImportError:
25 |         return False
26 | 
27 | 
28 | is_cuda_available = torch.cuda.is_available()
29 | is_npu_available = is_torch_npu_available()
30 | 
31 | 
32 | def get_visible_devices_keyword() -> str:
33 |     """Function that gets visible devices keyword name.
34 |     Returns:
35 |         'CUDA_VISIBLE_DEVICES' or `ASCEND_RT_VISIBLE_DEVICES`
36 |     """
37 |     return "CUDA_VISIBLE_DEVICES" if is_cuda_available else "ASCEND_RT_VISIBLE_DEVICES"
38 | 
39 | 
40 | def get_device_name() -> str:
41 |     """Function that gets the torch.device based on the current machine.
42 |     This currently only supports CPU, CUDA, NPU.
43 |     Returns:
44 |         device
45 |     """
46 |     if is_cuda_available:
47 |         device = "cuda"
48 |     elif is_npu_available:
49 |         device = "npu"
50 |     else:
51 |         device = "cpu"
52 |     return device
53 | 
54 | 
55 | def get_torch_device() -> any:
56 |     """Return the corresponding torch attribute based on the device type string.
57 |     Returns:
58 |         module: The corresponding torch device namespace, or torch.cuda if not found.
59 |     """
60 |     device_name = get_device_name()
61 |     try:
62 |         return getattr(torch, device_name)
63 |     except AttributeError:
64 |         logger.warning(f"Device namespace '{device_name}' not found in torch, try to load torch.cuda.")
65 |         return torch.cuda
66 | 
67 | 
68 | def get_device_id() -> int:
69 |     """Return current device id based on the device type.
70 |     Returns:
71 |         device index
72 |     """
73 |     return get_torch_device().current_device()
74 | 
75 | 
76 | def get_nccl_backend() -> str:
77 |     """Return nccl backend type based on the device type.
78 |     Returns:
79 |         nccl backend type string.
80 |     """
81 |     if is_cuda_available:
82 |         return "nccl"
83 |     elif is_npu_available:
84 |         return "hccl"
85 |     else:
86 |         raise RuntimeError(f"No available nccl backend found on device type {get_device_name()}.")
87 | 
88 | 
89 | def set_expandable_segments(enable: bool) -> None:
90 |     """Enable or disable expandable segments for cuda.
91 |     Args:
92 |         enable (bool): Whether to enable expandable segments. Used to avoid OOM.
93 |     """
94 |     if is_cuda_available:
95 |         torch.cuda.memory._set_allocator_settings(f"expandable_segments:{enable}")
96 | 


--------------------------------------------------------------------------------
/verl/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import is_dataclass
16 | from typing import Any, Optional
17 | 
18 | from omegaconf import DictConfig, ListConfig, OmegaConf
19 | 
20 | __all__ = ["omega_conf_to_dataclass"]
21 | 
22 | 
23 | def omega_conf_to_dataclass(config: DictConfig | dict, dataclass_type: Optional[type[Any]] = None) -> Any:
24 |     """
25 |     Convert an OmegaConf DictConfig to a dataclass.
26 | 
27 |     Args:
28 |         config: The OmegaConf DictConfig or dict to convert.
29 |         dataclass_type: The dataclass type to convert to. When dataclass_type is None,
30 |             the DictConfig must contain _target_ to be instantiated via hydra.instantiate API.
31 | 
32 |     Returns:
33 |         The dataclass instance.
34 |     """
35 |     # Got an empty config
36 |     if not config:
37 |         return dataclass_type if dataclass_type is None else dataclass_type()
38 |     # Got an object
39 |     if not isinstance(config, DictConfig | ListConfig | dict | list):
40 |         return config
41 | 
42 |     if dataclass_type is None:
43 |         assert "_target_" in config, (
44 |             "When dataclass_type is not provided, config must contain _target_. "
45 |             "See trainer/config/ppo_trainer.yaml algorithm section for an example. "
46 |             f"Got config: {config}"
47 |         )
48 |         from hydra.utils import instantiate
49 | 
50 |         return instantiate(config, _convert_="partial")
51 | 
52 |     if not is_dataclass(dataclass_type):
53 |         raise ValueError(f"{dataclass_type} must be a dataclass")
54 |     cfg = OmegaConf.create(config)  # in case it's a dict
55 |     # pop _target_ to avoid hydra instantiate error, as most dataclass do not have _target_
56 |     # Updated (vermouth1992) We add _target_ to BaseConfig so that it is compatible.
57 |     # Otherwise, this code path can't support recursive instantiation.
58 |     # if "_target_" in cfg:
59 |     #     cfg.pop("_target_")
60 |     cfg_from_dataclass = OmegaConf.structured(dataclass_type)
61 |     # let cfg override the existing vals in `cfg_from_dataclass`
62 |     cfg_merged = OmegaConf.merge(cfg_from_dataclass, cfg)
63 |     # now convert to `dataclass_type`
64 |     config_object = OmegaConf.to_object(cfg_merged)
65 |     return config_object
66 | 
67 | 
68 | def update_dict_with_config(dictionary: dict, config: DictConfig):
69 |     for key in dictionary:
70 |         if hasattr(config, key):
71 |             dictionary[key] = getattr(config, key)
72 | 


--------------------------------------------------------------------------------
/verl/verl/workers/sharding_manager/fsdp_ulysses.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains a resharding manager that binds weights from FSDP zero3 to XPerfGPT
16 | """
17 | 
18 | from torch.distributed.device_mesh import DeviceMesh
19 | 
20 | from verl import DataProto
21 | from verl.protocol import all_gather_data_proto
22 | from verl.utils.ulysses import get_ulysses_sequence_parallel_group, set_ulysses_sequence_parallel_group
23 | 
24 | from .base import BaseShardingManager
25 | 
26 | 
27 | class FSDPUlyssesShardingManager(BaseShardingManager):
28 |     """
29 |     Sharding manager to support data resharding when using FSDP + Ulysses
30 |     """
31 | 
32 |     def __init__(self, device_mesh: DeviceMesh):
33 |         super().__init__()
34 |         self.device_mesh = device_mesh
35 |         self.seed_offset = 12345
36 | 
37 |     def __enter__(self):
38 |         if self.device_mesh is not None:
39 |             # We have a global SP group
40 |             # so we have to change to use model-specific sp group
41 |             self.prev_sp_group = get_ulysses_sequence_parallel_group()
42 |             set_ulysses_sequence_parallel_group(self.device_mesh["sp"].get_group())
43 |             # TODO: check how to set seed for each model
44 | 
45 |     def __exit__(self, exc_type, exc_value, traceback):
46 |         # restore random states
47 |         if self.device_mesh is not None:
48 |             # revert to previous sp group
49 |             set_ulysses_sequence_parallel_group(self.prev_sp_group)
50 |             # TODO: check how to set seed for each model
51 | 
52 |     def preprocess_data(self, data: DataProto) -> DataProto:
53 |         """
54 |         AllGather data from sp region
55 |         This is because the data is first sharded along the FSDP dimension as we utilize the DP_COMPUTE
56 |         In Ulysses, we need to make sure the same data is used across a SP group
57 |         """
58 |         if self.device_mesh is not None:
59 |             group = self.device_mesh["sp"].get_group()
60 | 
61 |             all_gather_data_proto(data=data, process_group=group)
62 |         return data
63 | 
64 |     def postprocess_data(self, data: DataProto) -> DataProto:
65 |         """
66 |         Split the data to follow FSDP partition
67 |         """
68 |         if self.device_mesh is not None:
69 |             sp_size = self.device_mesh["sp"].size()
70 |             sp_rank = self.device_mesh["sp"].get_local_rank()
71 |             data = data.chunk(chunks=sp_size)[sp_rank]
72 |         return data
73 | 


--------------------------------------------------------------------------------
/verl/verl/trainer/main_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Offline evaluate the performance of a generated file using reward model and ground truth verifier.
16 | The input is a parquet file that contains N generated sequences and (optional) the ground truth.
17 | 
18 | """
19 | 
20 | from collections import defaultdict
21 | 
22 | import hydra
23 | import numpy as np
24 | import pandas as pd
25 | import ray
26 | from omegaconf import OmegaConf
27 | from tqdm import tqdm
28 | 
29 | from verl.trainer.ppo.reward import get_custom_reward_fn
30 | from verl.utils.fs import copy_to_local
31 | 
32 | 
33 | @ray.remote
34 | def process_item(reward_fn, data_source, response_lst, reward_data):
35 |     ground_truth = reward_data["ground_truth"]
36 |     score_lst = [reward_fn(data_source, r, ground_truth) for r in response_lst]
37 |     return data_source, np.mean(score_lst)
38 | 
39 | 
40 | @hydra.main(config_path="config", config_name="evaluation", version_base=None)
41 | def main(config):
42 |     local_path = copy_to_local(config.data.path, use_shm=config.data.get("use_shm", False))
43 |     dataset = pd.read_parquet(local_path)
44 |     responses = dataset[config.data.response_key]
45 |     data_sources = dataset[config.data.data_source_key]
46 |     reward_model_data = dataset[config.data.reward_model_key]
47 | 
48 |     total = len(dataset)
49 | 
50 |     # Initialize Ray
51 |     if not ray.is_initialized():
52 |         ray.init(**OmegaConf.to_container(config.ray_kwargs.get("ray_init", {})))
53 | 
54 |     # evaluate test_score based on data source
55 |     data_source_reward = defaultdict(list)
56 |     compute_score = get_custom_reward_fn(config)
57 | 
58 |     # Create remote tasks
59 |     remote_tasks = [
60 |         process_item.remote(compute_score, data_sources[i], responses[i], reward_model_data[i]) for i in range(total)
61 |     ]
62 | 
63 |     # Process results as they come in
64 |     with tqdm(total=total) as pbar:
65 |         while len(remote_tasks) > 0:
66 |             # Use ray.wait to get completed tasks
67 |             done_ids, remote_tasks = ray.wait(remote_tasks)
68 |             for result_id in done_ids:
69 |                 data_source, score = ray.get(result_id)
70 |                 data_source_reward[data_source].append(score)
71 |                 pbar.update(1)
72 | 
73 |     metric_dict = {}
74 |     for data_source, rewards in data_source_reward.items():
75 |         metric_dict[f"test_score/{data_source}"] = np.mean(rewards)
76 | 
77 |     print(metric_dict)
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     main()
82 | 


--------------------------------------------------------------------------------