├── verl ├── verl │ ├── py.typed │ ├── version │ │ └── version │ ├── trainer │ │ ├── runtime_env.yaml │ │ ├── config │ │ │ ├── evaluation.yaml │ │ │ ├── __init__.py │ │ │ ├── ref │ │ │ │ ├── megatron_ref.yaml │ │ │ │ ├── dp_ref.yaml │ │ │ │ └── ref.yaml │ │ │ ├── npu_profile │ │ │ │ └── npu_profile.yaml │ │ │ ├── reward_model │ │ │ │ ├── dp_reward_model.yaml │ │ │ │ └── megatron_reward_model.yaml │ │ │ ├── generation.yaml │ │ │ ├── actor │ │ │ │ └── dp_actor.yaml │ │ │ └── sft_trainer.yaml │ │ ├── __init__.py │ │ ├── ppo │ │ │ └── __init__.py │ │ ├── constants_ppo.py │ │ └── main_eval.py │ ├── workers │ │ ├── rollout │ │ │ ├── sglang_rollout │ │ │ │ ├── test.py │ │ │ │ └── __init__.py │ │ │ ├── naive │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── vllm_rollout │ │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── engine │ │ │ ├── megatron │ │ │ │ └── __init__.py │ │ │ ├── fsdp │ │ │ │ ├── __init__.py │ │ │ │ └── utils.py │ │ │ └── __init__.py │ │ ├── sharding_manager │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── fsdp_ulysses.py │ │ ├── roles │ │ │ ├── __init__.py │ │ │ └── actor.py │ │ ├── reward_model │ │ │ ├── __init__.py │ │ │ ├── megatron │ │ │ │ └── __init__.py │ │ │ └── base.py │ │ ├── actor │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── critic │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── config │ │ │ └── __init__.py │ │ └── reward_manager │ │ │ ├── __init__.py │ │ │ ├── abstract.py │ │ │ └── registry.py │ ├── models │ │ ├── __init__.py │ │ ├── llama │ │ │ ├── __init__.py │ │ │ └── megatron │ │ │ │ ├── checkpoint_utils │ │ │ │ └── __init__.py │ │ │ │ ├── __init__.py │ │ │ │ └── layers │ │ │ │ ├── __init__.py │ │ │ │ └── parallel_rmsnorm.py │ │ ├── qwen2 │ │ │ ├── __init__.py │ │ │ └── megatron │ │ │ │ ├── checkpoint_utils │ │ │ │ └── __init__.py │ │ │ │ ├── layers │ │ │ │ ├── __init__.py │ │ │ │ ├── parallel_rmsnorm.py │ │ │ │ └── parallel_linear.py │ │ │ │ └── __init__.py │ │ ├── transformers │ │ │ ├── __init__.py │ │ │ └── npu_patch.py │ │ ├── mcore │ │ │ ├── qwen2_5_vl │ │ │ │ └── __init__.py │ │ │ ├── mbridge.py │ │ │ └── __init__.py │ │ ├── README.md │ │ ├── registry.py │ │ └── weight_loader_registry.py │ ├── experimental │ │ ├── __init__.py │ │ ├── dataset │ │ │ ├── __init__.py │ │ │ └── sampler.py │ │ ├── dynamic_dataset │ │ │ └── __init__.py │ │ └── agent_loop │ │ │ ├── __init__.py │ │ │ └── single_turn_agent_loop.py │ ├── model_merger │ │ ├── __init__.py │ │ └── __main__.py │ ├── third_party │ │ ├── __init__.py │ │ ├── sglang │ │ │ └── __init__.py │ │ └── vllm │ │ │ └── __init__.py │ ├── utils │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ ├── memory.py │ │ │ ├── sequence_parallel.py │ │ │ ├── dist_checkpointing.py │ │ │ └── pipeline_parallel.py │ │ ├── checkpoint │ │ │ └── __init__.py │ │ ├── experimental │ │ │ └── __init__.py │ │ ├── rendezvous │ │ │ ├── __init__.py │ │ │ └── ray_backend.py │ │ ├── metric │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── debug │ │ │ ├── __init__.py │ │ │ └── performance.py │ │ ├── reward_score │ │ │ ├── prime_code │ │ │ │ ├── README.md │ │ │ │ └── utils.py │ │ │ ├── math_batch.py │ │ │ ├── geo3k.py │ │ │ ├── math_verify.py │ │ │ └── gsm8k.py │ │ ├── dataset │ │ │ ├── __init__.py │ │ │ └── README.md │ │ ├── __init__.py │ │ ├── logger │ │ │ └── __init__.py │ │ ├── vllm │ │ │ └── __init__.py │ │ ├── logging_utils.py │ │ ├── profiler │ │ │ ├── empty_annotations.py │ │ │ └── __init__.py │ │ ├── transformers_compat.py │ │ ├── kernel │ │ │ └── __init__.py │ │ ├── distributed.py │ │ ├── net_utils.py │ │ ├── torch_dtypes.py │ │ ├── device.py │ │ └── config.py │ ├── tools │ │ ├── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── mcp_clients │ │ │ └── utils.py │ ├── interactions │ │ ├── utils │ │ │ └── __init__.py │ │ └── __init__.py │ ├── single_controller │ │ ├── base │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── ray │ │ │ └── __init__.py │ └── __init__.py ├── Notice.txt ├── .gemini │ └── config.yaml ├── .github │ ├── dependabot.yml │ ├── workflows │ │ ├── secrets_scan.yml │ │ ├── type-coverage-check.yml │ │ ├── pre-commit-full.yml │ │ ├── pre-commit.yml │ │ ├── .deprecate │ │ │ └── e2e_prime.yml │ │ ├── README.md │ │ ├── e2e_sppo.yml │ │ ├── check-pr-title.yml │ │ ├── e2e_spin.yml │ │ └── scorecard.yml │ ├── CODEOWNERS │ └── PULL_REQUEST_TEMPLATE.md ├── requirements-npu.txt ├── .readthedocs.yaml ├── requirements_sglang.txt ├── requirements.txt ├── scripts │ ├── __init__.py │ ├── print_cfg.py │ ├── generate_trainer_config.sh │ └── install_vllm_sglang_mcore.sh ├── .pre-commit-config.yaml ├── .gitignore └── examples │ └── sglang_multiturn │ └── tool_config.yaml ├── .gitignore ├── images └── introduction.png ├── inference └── scripts │ ├── sglang_glm_45_air.sh │ ├── sglang_qwen_coder_30B.sh │ ├── sglang_sr_scientist_30B.sh │ ├── sandbox.sh │ ├── sglang_qwen_coder_480B.sh │ ├── sglang_oss_120b.sh │ ├── sglang_oss_20b.sh │ └── inference.sh └── utils └── data_preprocessing.py /verl/verl/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | data/ 3 | 4 | -------------------------------------------------------------------------------- /verl/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.5.0.dev 2 | -------------------------------------------------------------------------------- /verl/Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /images/introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/SR-Scientist/HEAD/images/introduction.png -------------------------------------------------------------------------------- /verl/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | CUDA_DEVICE_MAX_CONNECTIONS: "1" 6 | -------------------------------------------------------------------------------- /verl/verl/workers/rollout/sglang_rollout/test.py: -------------------------------------------------------------------------------- 1 | from sglang.srt.function_call.function_call_parser import FunctionCallParser 2 | items = FunctionCallParser.ToolCallParserEnum.items() 3 | print(items) -------------------------------------------------------------------------------- /verl/.gemini/config.yaml: -------------------------------------------------------------------------------- 1 | have_fun: false 2 | code_review: 3 | disable: false 4 | comment_severity_threshold: HIGH 5 | max_review_comments: -1 6 | pull_request_opened: 7 | help: false 8 | summary: false 9 | code_review: true 10 | ignore_patterns: [] 11 | -------------------------------------------------------------------------------- /inference/scripts/sglang_glm_45_air.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH="../models/GLM-4.5-FP8" 2 | conda activate srscientist 3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \ 4 | --tp 8 \ 5 | --tool-call-parser glm45 \ 6 | --reasoning-parser glm45 \ 7 | --mem-fraction-static 0.85 8 | 9 | -------------------------------------------------------------------------------- /inference/scripts/sglang_qwen_coder_30B.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH="../models/Qwen3-Coder-30B-A3B-Instruct" 2 | conda activate srscientist 3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \ 4 | --tp 8 \ 5 | --tool-call-parser qwen3_coder \ 6 | --mem-fraction-static 0.85 7 | 8 | 9 | -------------------------------------------------------------------------------- /inference/scripts/sglang_sr_scientist_30B.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH="../models/Qwen3-Coder-30B-A3B-Instruct" 2 | conda activate srscientist 3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \ 4 | --tp 8 \ 5 | --tool-call-parser qwen3_coder \ 6 | --mem-fraction-static 0.85 7 | 8 | 9 | -------------------------------------------------------------------------------- /verl/.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | ## Enabled the dependabot to check the dependencies of the project 2 | ## Dependabot will open pull requests to update dependencies automatically 3 | 4 | version: 2 5 | updates: 6 | - package-ecosystem: pip 7 | directory: "/" 8 | schedule: 9 | interval: weekly -------------------------------------------------------------------------------- /inference/scripts/sandbox.sh: -------------------------------------------------------------------------------- 1 | conda activate sandbox-runtime 2 | make run-online PORT=9010 & 3 | make run-online PORT=9020 & 4 | make run-online PORT=9030 & 5 | make run-online PORT=9040 & 6 | make run-online PORT=9050 & 7 | make run-online PORT=9060 & 8 | make run-online PORT=9070 & 9 | make run-online PORT=9080 & -------------------------------------------------------------------------------- /inference/scripts/sglang_qwen_coder_480B.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH="../models/Qwen3-Coder-480B-A35B-Instruct-FP8" 2 | conda activate srscientist 3 | python3 -m sglang.launch_server --model-path $MODEL_PATH \ 4 | --tp 4 \ 5 | --dp 2 \ 6 | --tool-call-parser qwen3_coder \ 7 | --mem-fraction-static 0.85 8 | 9 | 10 | -------------------------------------------------------------------------------- /verl/requirements-npu.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | hydra-core 7 | numpy<2.0.0 8 | pandas 9 | peft 10 | pyarrow>=15.0.0 11 | pybind11 12 | pylatexenc 13 | tensordict>=0.8.0,<=0.9.1,!=0.9.0 14 | transformers==4.52.4 15 | ray==2.46.0 16 | wandb 17 | mathruler 18 | torchdata 19 | einops 20 | qwen_vl_utils 21 | torchvision==0.20.1 22 | -------------------------------------------------------------------------------- /verl/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-22.04 8 | tools: 9 | python: "3.11" 10 | rust: "1.70" 11 | 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: docs/requirements-docs.txt 18 | - method: pip 19 | path: . 20 | -------------------------------------------------------------------------------- /verl/requirements_sglang.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | numpy<2.0.0 9 | pandas 10 | peft 11 | pyarrow>=19.0.0 12 | pybind11 13 | pylatexenc 14 | ray[default]>=2.10 15 | tensordict>=0.8.0,<=0.9.1,!=0.9.0 16 | torchdata 17 | torchvision 18 | transformers 19 | wandb 20 | sglang[all]==0.4.9.post6 21 | huggingface_hub 22 | -------------------------------------------------------------------------------- /inference/scripts/sglang_oss_120b.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH="../models/gpt-oss-120b" 2 | conda activate srscientist 3 | # For machines without internet access, set TIKTOKEN_RS_CACHE_DIR=CACHE_FILE by following the instructions in this issue: https://huggingface.co/openai/gpt-oss-120b/discussions/39. 4 | python3 -m sglang.launch_server --model-path $MODEL_PATH \ 5 | --tp 8 \ 6 | --tool-call-parser gpt-oss \ 7 | --reasoning-parser gpt-oss \ 8 | --mem-fraction-static 0.85 9 | 10 | -------------------------------------------------------------------------------- /inference/scripts/sglang_oss_20b.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH="../models/gpt-oss-120b" 2 | conda activate srscientist 3 | # For machines without internet access, set TIKTOKEN_RS_CACHE_DIR=CACHE_FILE by following the instructions in this issue: https://huggingface.co/openai/gpt-oss-120b/discussions/39. 4 | python3 -m sglang.launch_server --model-path $MODEL_PATH \ 5 | --tp 8 \ 6 | --tool-call-parser gpt-oss \ 7 | --reasoning-parser gpt-oss \ 8 | --mem-fraction-static 0.85 9 | 10 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model 7 | 8 | custom_reward_function: 9 | path: null 10 | name: compute_score 11 | 12 | ray_kwargs: 13 | ray_init: 14 | num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. 15 | timeline_json_file: null 16 | -------------------------------------------------------------------------------- /verl/requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | liger-kernel 9 | numpy<2.0.0 10 | pandas 11 | peft 12 | pyarrow>=19.0.0 13 | pybind11 14 | pylatexenc 15 | pre-commit 16 | ray[default] 17 | tensordict>=0.8.0,<=0.9.1,!=0.9.0 18 | torchdata 19 | transformers 20 | # vllm==0.8.4 21 | wandb 22 | packaging>=20.0 23 | uvicorn 24 | fastapi 25 | latex2sympy2_extended 26 | math_verify 27 | tensorboard 28 | torch_memory_saver -------------------------------------------------------------------------------- /verl/.github/workflows/secrets_scan.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - v0.* 6 | pull_request: 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 17 | with: 18 | fetch-depth: 0 19 | - name: Secret Scanning 20 | uses: trufflesecurity/trufflehog@7dc056a193116ba8d82154bf0549381c8fb8545c # v3.88.14 21 | with: 22 | extra_args: --results=verified,unknown 23 | -------------------------------------------------------------------------------- /verl/verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | -------------------------------------------------------------------------------- /verl/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/model_merger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/experimental/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/workers/engine/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/experimental/dynamic_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/verl/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /verl/verl/tools/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /verl/verl/interactions/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /verl/verl/workers/roles/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .critic import CriticWorker 16 | 17 | __all__ = ["CriticWorker"] 18 | -------------------------------------------------------------------------------- /verl/verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .utils import reduce_metrics 16 | 17 | __all__ = ["reduce_metrics"] 18 | -------------------------------------------------------------------------------- /verl/verl/workers/engine/fsdp/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .engine_impl import FSDPEngine 15 | 16 | __all__ = ["FSDPEngine"] 17 | -------------------------------------------------------------------------------- /verl/verl/interactions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023-2024 SGLang Team 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /verl/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | 17 | __all__ = ["NaiveRollout"] 18 | -------------------------------------------------------------------------------- /verl/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | 17 | __all__ = ["BasePPORewardModel"] 18 | -------------------------------------------------------------------------------- /verl/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | 17 | __all__ = ["MegatronRewardModel"] 18 | -------------------------------------------------------------------------------- /verl/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # APIs kept for backward compatibility purpose 16 | # For new features please develop in verl/utils/profiler/ 17 | from ..profiler import * # noqa 18 | -------------------------------------------------------------------------------- /verl/verl/workers/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .base import BaseEngine, EngineRegistry 15 | from .fsdp import FSDPEngine 16 | 17 | __all__ = ["BaseEngine", "EngineRegistry", "FSDPEngine"] 18 | -------------------------------------------------------------------------------- /verl/verl/utils/reward_score/prime_code/README.md: -------------------------------------------------------------------------------- 1 | ## LiveCodeBench 2 | 3 | ### Introduction 4 | [LiveCodeBench](https://github.com/LiveCodeBench/LiveCodeBench) provides holistic and contamination-free evaluation of coding capabilities of LLMs. Particularly, LiveCodeBench continuously collects new problems over time from contests across three competition platforms -- LeetCode, AtCoder, and CodeForces. 5 | 6 | ### How to reproduce 7 | Our evaluation is grounded on the version found in LiveCodeBench. 8 | > **Installation** 9 | ```bash 10 | # Make sure the CUDA version > 12.0. 11 | pip install -r requirements.txt 12 | pip install flash-attn --no-build-isolation 13 | ``` 14 | 15 | ### Acknowleage 16 | Thank you to the [LiveCodeBench](https://livecodebench.github.io/leaderboard.html) team for their contributions to the open-source community. -------------------------------------------------------------------------------- /verl/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .algorithm import * # noqa 16 | from .config import * # noqa 17 | from . import config, algorithm 18 | 19 | __all__ = config.__all__ + algorithm.__all__ 20 | -------------------------------------------------------------------------------- /verl/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .hf_rollout import HFRollout 17 | from .naive import NaiveRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | 19 | __all__ = ["RLHFDataset", "RMDataset", "SFTDataset"] 20 | -------------------------------------------------------------------------------- /verl/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup 17 | 18 | __all__ = ["Worker", "WorkerGroup", "ClassWithInitArgs", "ResourcePool"] 19 | -------------------------------------------------------------------------------- /verl/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /verl/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # APIs kept for backward compatibility purpose 16 | # This file is deprecated, for new features please develop in profiler/performance.py 17 | from verl.utils.profiler.performance import simple_timer, reduce_timing # noqa 18 | -------------------------------------------------------------------------------- /verl/verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import config, tokenizer 16 | from .config import omega_conf_to_dataclass 17 | from .tokenizer import hf_processor, hf_tokenizer 18 | 19 | __all__ = tokenizer.__all__ + config.__all__ + ["hf_processor", "hf_tokenizer", "omega_conf_to_dataclass"] 20 | -------------------------------------------------------------------------------- /verl/verl/experimental/agent_loop/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .agent_loop import AgentLoopBase, AgentLoopManager 16 | from .single_turn_agent_loop import SingleTurnAgentLoop 17 | from .tool_agent_loop import ToolAgentLoop 18 | 19 | _ = [SingleTurnAgentLoop, ToolAgentLoop] 20 | 21 | __all__ = ["AgentLoopBase", "AgentLoopManager"] 22 | -------------------------------------------------------------------------------- /verl/.github/workflows/type-coverage-check.yml: -------------------------------------------------------------------------------- 1 | name: Type Annotation and Docstring Coverage 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - '**/*.py' 7 | - '.github/workflows/type-coverage-check.yml' 8 | 9 | jobs: 10 | type-coverage-check: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 0 # 🚨 Important: fetch full history so `origin/main` is available 16 | - name: Set up Python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: '3.10' 20 | 21 | - name: Install dependencies 22 | run: | 23 | pip install gitpython 24 | pip install -e .[sglang] 25 | - name: Run type annotation coverage check 26 | run: | 27 | python3 tests/special_sanity/type_coverage_check.py 28 | - name: Run docstring coverage check 29 | run: | 30 | python3 tests/special_sanity/check_api_docs.py verl 31 | -------------------------------------------------------------------------------- /verl/verl/workers/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .critic import * # noqa 16 | from .actor import * # noqa 17 | from .engine import * # noqa 18 | from .optimizer import * # noqa 19 | from .rollout import * # noqa 20 | from . import actor, critic, engine, optimizer, rollout 21 | 22 | __all__ = actor.__all__ + critic.__all__ + engine.__all__ + optimizer.__all__ + rollout.__all__ 23 | -------------------------------------------------------------------------------- /verl/.github/workflows/pre-commit-full.yml: -------------------------------------------------------------------------------- 1 | name: pre-commit-full 2 | 3 | # Run weekly on Sunday at 00:00 UTC 4 | on: 5 | schedule: 6 | - cron: "0 0 * * 0" 7 | # Allow manual triggering 8 | workflow_dispatch: 9 | 10 | # Declare permissions just read content. 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | pre-commit-full: 16 | runs-on: ubuntu-latest 17 | strategy: 18 | matrix: 19 | python-version: ["3.12"] 20 | steps: 21 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Set ruff --output-format=github 27 | run: | 28 | sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml 29 | git add .pre-commit-config.yaml 30 | - uses: pre-commit/action@v3.0.1 31 | -------------------------------------------------------------------------------- /verl/verl/models/mcore/qwen2_5_vl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # Copyright (c) 2024 Alibaba PAI Team. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | from .model import Qwen2_5VLModel 19 | from .vision_config import get_vision_model_config, get_vision_projection_config 20 | 21 | __all__ = ["Qwen2_5VLModel", "get_vision_model_config", "get_vision_projection_config"] 22 | -------------------------------------------------------------------------------- /verl/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | 17 | from verl import DataProto 18 | 19 | __all__ = ["BaseRollout"] 20 | 21 | 22 | class BaseRollout(ABC): 23 | """Base class for rollout.""" 24 | 25 | @abstractmethod 26 | def generate_sequences(self, prompts: DataProto) -> DataProto: 27 | """Generate sequences""" 28 | pass 29 | -------------------------------------------------------------------------------- /verl/.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | /docs @eric-haibin-lin @zhaochenyang20 @hongpeng-guo 2 | /docs/amd_tutorial @yushengsu-thu 3 | /docs/slang_multiturn @zhaochenyang20 @SwordFaith 4 | 5 | /recipe/dapo @tongyx361 @PeterSH6 6 | /recipe/spin @zhaochenyang20 7 | /recipe/sppo @zhaochenyang20 8 | 9 | /third_party/sglang @zhaochenyang20 @SwordFaith 10 | /third_party/vllm @PeterSH6 @wuxibin89 11 | /verl/single_controller @zw0610 @wuxibin89 @hongpeng-guo 12 | /verl/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6 13 | /verl/workers/engine @eric-haibin-lin @vermouth1992 @ZihengJiang 14 | /verl/workers/roles @eric-haibin-lin @vermouth1992 @ZihengJiang 15 | /verl/workers/engine/fsdp @eric-haibin-lin @vermouth1992 @ZihengJiang 16 | /verl/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq 17 | /verl/workers/rollout/sglang_rollout @zhaochenyang20 @SwordFaith @chenhaiq 18 | 19 | /tests/single_controller @zw0610 @wuxibin89 20 | /tests/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6 21 | /tests/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq 22 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/ref/megatron_ref.yaml: -------------------------------------------------------------------------------- 1 | # megatron ref config, inheriting from trainer/config/ref/ref.yaml 2 | defaults: 3 | - ref 4 | # load the reference default config, then apply the fields in the current yaml 5 | - _self_ 6 | 7 | strategy: megatron 8 | 9 | megatron: 10 | _target_: verl.workers.config.MegatronEngineConfig 11 | param_offload: False 12 | tensor_model_parallel_size: 1 13 | expert_model_parallel_size: 1 14 | expert_tensor_parallel_size: None 15 | pipeline_model_parallel_size: 1 16 | virtual_pipeline_model_parallel_size: null # change VPP interface for parallelism tests 17 | context_parallel_size: 1 18 | sequence_parallel: True 19 | use_distributed_optimizer: False 20 | use_dist_checkpointing: False 21 | dist_checkpointing_path: null 22 | seed: ${oc.select:actor_rollout_ref.actor.megatron.seed,42} 23 | override_transformer_config: ${oc.select:actor_rollout_ref.actor.megatron.override_transformer_config,{}} 24 | use_mbridge: ${oc.select:actor_rollout_ref.actor.megatron.use_mbridge,False} 25 | 26 | load_weight: True -------------------------------------------------------------------------------- /verl/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from . import base 17 | from .base import * 18 | 19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 20 | 21 | # Note(haibin.lin): single_controller.__version__ is deprecated 22 | with open(os.path.join(os.path.join(version_folder, os.pardir), "version/version")) as f: 23 | __version__ = f.read().strip() 24 | 25 | 26 | __all__ = base.__all__ 27 | -------------------------------------------------------------------------------- /verl/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import ( 16 | RayClassWithInitArgs, 17 | RayResourcePool, 18 | RayWorkerGroup, 19 | create_colocated_worker_cls, 20 | create_colocated_worker_cls_fused, 21 | ) 22 | 23 | __all__ = [ 24 | "RayClassWithInitArgs", 25 | "RayResourcePool", 26 | "RayWorkerGroup", 27 | "create_colocated_worker_cls", 28 | "create_colocated_worker_cls_fused", 29 | ] 30 | -------------------------------------------------------------------------------- /verl/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from .aggregate_logger import ( 17 | DecoratorLoggerBase, 18 | LocalLogger, 19 | log_with_rank, 20 | print_rank_0, 21 | print_with_rank, 22 | print_with_rank_and_timer, 23 | ) 24 | 25 | __all__ = [ 26 | "LocalLogger", 27 | "DecoratorLoggerBase", 28 | "print_rank_0", 29 | "print_with_rank", 30 | "print_with_rank_and_timer", 31 | "log_with_rank", 32 | ] 33 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/npu_profile/npu_profile.yaml: -------------------------------------------------------------------------------- 1 | # Options for the npu profiler 2 | options: 3 | 4 | # Storage path of collected data. 5 | save_path: ./profiler_data 6 | 7 | # The roles that will be profiled. Only takes effect in discrete mode. 8 | # optional values: all, rollout_generate, actor_compute_log_prob, actor_update and ref_compute_log_prob. 9 | # "all" means all roles will be profiled. 10 | roles: ["all"] 11 | 12 | # Collection level, optional values: level_none, level0, level1, level2. 13 | level: level1 14 | 15 | # Whether to enable memory analysis. 16 | with_memory: False 17 | 18 | # Whether to record tensor shape. 19 | record_shapes: False 20 | 21 | # Whether to record Device-side performance data. 22 | with_npu: True 23 | 24 | # Whether to record Host-side performance data. 25 | with_cpu: True 26 | 27 | # Whether to record Python call stack information. 28 | with_module: False 29 | 30 | # Whether to record operator call stack information. 31 | with_stack: False 32 | 33 | # Whether to automatically parse the data. 34 | analysis: True -------------------------------------------------------------------------------- /verl/verl/models/mcore/mbridge.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | try: 17 | from mbridge import AutoBridge 18 | from mbridge.utils.post_creation_callbacks import freeze_moe_router, make_value_model 19 | except ImportError: 20 | print("mbridge package not found. Please install mbridge with `pip install verl[mcore]` or `pip install mbridge`") 21 | raise 22 | 23 | __all__ = ["AutoBridge", "make_value_model", "freeze_moe_router"] 24 | -------------------------------------------------------------------------------- /verl/verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .registry import ( 17 | get_mcore_forward_fn, 18 | get_mcore_forward_fused_fn, 19 | get_mcore_weight_converter, 20 | hf_to_mcore_config, 21 | init_mcore_model, 22 | ) 23 | 24 | __all__ = [ 25 | "hf_to_mcore_config", 26 | "init_mcore_model", 27 | "get_mcore_forward_fn", 28 | "get_mcore_weight_converter", 29 | "get_mcore_forward_fused_fn", 30 | ] 31 | -------------------------------------------------------------------------------- /verl/verl/utils/vllm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .utils import TensorLoRARequest, VLLMHijack, is_version_ge 16 | 17 | # The contents of vllm/patch.py should not be imported here, because the contents of 18 | # patch.py should be imported after the vllm LLM instance is created. Therefore, 19 | # wait until you actually start using it before importing the contents of 20 | # patch.py separately. 21 | 22 | __all__ = [ 23 | "TensorLoRARequest", 24 | "VLLMHijack", 25 | "is_version_ge", 26 | ] 27 | -------------------------------------------------------------------------------- /verl/verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelQwen2Attention 16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad 17 | from .parallel_mlp import ParallelQwen2MLP 18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm 19 | 20 | __all__ = [ 21 | "ParallelQwen2Attention", 22 | "ParallelQwen2DecoderLayer", 23 | "ParallelQwen2DecoderLayerRmPad", 24 | "ParallelQwen2MLP", 25 | "ParallelQwen2RMSNorm", 26 | ] 27 | -------------------------------------------------------------------------------- /verl/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: "v0.12.2" 4 | hooks: 5 | - id: ruff 6 | args: ["--fix", "--show-fixes", "--output-format=full"] 7 | exclude: ^.*\.(ipynb)$ 8 | - id: ruff-format 9 | 10 | - repo: https://github.com/pre-commit/mirrors-mypy 11 | rev: 'v1.17.0' 12 | hooks: 13 | - id: mypy 14 | 15 | - repo: local 16 | hooks: 17 | - id: autogen-trainer-cfg 18 | name: Generate and verify verl/trainer/config/_generated_*.yaml 19 | entry: scripts/generate_trainer_config.sh 20 | language: script 21 | pass_filenames: false 22 | 23 | - repo: local 24 | hooks: 25 | - id: check-docstrings 26 | name: Check doc string coverage 27 | entry: python3 tests/special_sanity/check_docstrings.py 28 | language: python 29 | pass_filenames: false 30 | 31 | - repo: local 32 | hooks: 33 | - id: check-license 34 | name: Check license 35 | entry: python3 tests/special_sanity/check_license.py --directory . 36 | language: python 37 | pass_filenames: false 38 | -------------------------------------------------------------------------------- /verl/verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Individual Contributor: Mert Unsal 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .math import compute_score 16 | 17 | 18 | def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos): 19 | """ 20 | This is a demonstration of how the batched reward function should look like. 21 | Typically, you want to use batched reward to speed up the process with parallelization 22 | """ 23 | return [ 24 | compute_score(solution_str, ground_truth) 25 | for solution_str, ground_truth in zip(solution_strs, ground_truths, strict=True) 26 | ] 27 | -------------------------------------------------------------------------------- /verl/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | 18 | import torch 19 | 20 | 21 | def set_basic_config(level): 22 | """ 23 | This function sets the global logging format and level. It will be called when import verl 24 | """ 25 | logging.basicConfig(format="%(levelname)s:%(asctime)s:%(message)s", level=level) 26 | 27 | 28 | def log_to_file(string): 29 | print(string) 30 | if os.path.isdir("logs"): 31 | with open(f"logs/log_{torch.distributed.get_rank()}", "a+") as f: 32 | f.write(string + "\n") 33 | -------------------------------------------------------------------------------- /verl/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | def __init__(self): 23 | self.timing = {} 24 | 25 | def __enter__(self): 26 | pass 27 | 28 | def __exit__(self, exc_type, exc_value, traceback): 29 | pass 30 | 31 | def preprocess_data(self, data: DataProto) -> DataProto: 32 | return data 33 | 34 | def postprocess_data(self, data: DataProto) -> DataProto: 35 | return data 36 | -------------------------------------------------------------------------------- /verl/.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # c.f. https://github.com/pre-commit/action?tab=readme-ov-file#using-this-action 2 | name: pre-commit 3 | 4 | # No need to avoid / cancel lightweight pre-commit jobs 5 | on: 6 | pull_request: 7 | push: 8 | branches: 9 | - main 10 | - v0.* 11 | 12 | # Declare permissions just read content. 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | pre-commit: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | matrix: 21 | python-version: ["3.12"] 22 | steps: 23 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install the current repository 29 | run: | 30 | pip install -e . 31 | - name: Set ruff --output-format=github 32 | run: | 33 | sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml 34 | git add .pre-commit-config.yaml 35 | # Check "--all-files" by default 36 | - uses: pre-commit/action@v3.0.1 37 | -------------------------------------------------------------------------------- /verl/verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | import torch 21 | 22 | from verl import DataProto 23 | 24 | __all__ = ["BasePPOCritic"] 25 | 26 | 27 | class BasePPOCritic(ABC): 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /verl/verl/utils/profiler/empty_annotations.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Callable, Optional 16 | 17 | 18 | def mark_start_range( 19 | message: Optional[str] = None, 20 | color: Optional[str] = None, 21 | domain: Optional[str] = None, 22 | category: Optional[str] = None, 23 | ) -> None: 24 | pass 25 | 26 | 27 | def mark_end_range(range_id: str) -> None: 28 | pass 29 | 30 | 31 | def mark_annotate( 32 | message: Optional[str] = None, 33 | color: Optional[str] = None, 34 | domain: Optional[str] = None, 35 | category: Optional[str] = None, 36 | ) -> Callable: 37 | def decorator(func): 38 | return func 39 | 40 | return decorator 41 | -------------------------------------------------------------------------------- /verl/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .registry import get_reward_manager_cls, register # noqa: I001 16 | from .batch import BatchRewardManager 17 | from .dapo import DAPORewardManager 18 | from .naive import NaiveRewardManager 19 | from .prime import PrimeRewardManager 20 | from .sr_scientist import SRScientistRewardManager 21 | 22 | 23 | # Note(haibin.lin): no need to include all reward managers here in case of complicated dependencies 24 | __all__ = [ 25 | "BatchRewardManager", 26 | "DAPORewardManager", 27 | "NaiveRewardManager", 28 | "PrimeRewardManager", 29 | "register", 30 | "get_reward_manager_cls", 31 | "SRScientistRewardManager", 32 | ] 33 | -------------------------------------------------------------------------------- /verl/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | ParallelLlamaForCausalLM, 17 | # rmpad with megatron 18 | ParallelLlamaForCausalLMRmPad, 19 | # rmpad with megatron and pipeline parallelism 20 | ParallelLlamaForCausalLMRmPadPP, 21 | ParallelLlamaForValueRmPad, 22 | ParallelLlamaForValueRmPadPP, 23 | # original model with megatron 24 | ParallelLlamaModel, 25 | ) 26 | 27 | __all__ = [ 28 | "ParallelLlamaForCausalLM", 29 | "ParallelLlamaForCausalLMRmPad", 30 | "ParallelLlamaForCausalLMRmPadPP", 31 | "ParallelLlamaForValueRmPad", 32 | "ParallelLlamaForValueRmPadPP", 33 | "ParallelLlamaModel", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_qwen2_megatron import ( 16 | ParallelQwen2ForCausalLM, 17 | # rmpad with megatron 18 | ParallelQwen2ForCausalLMRmPad, 19 | # rmpad with megatron and pipeline parallelism 20 | ParallelQwen2ForCausalLMRmPadPP, 21 | ParallelQwen2ForValueRmPad, 22 | ParallelQwen2ForValueRmPadPP, 23 | # original model with megatron 24 | ParallelQwen2Model, 25 | ) 26 | 27 | __all__ = [ 28 | "ParallelQwen2ForCausalLM", 29 | "ParallelQwen2ForCausalLMRmPad", 30 | "ParallelQwen2ForCausalLMRmPadPP", 31 | "ParallelQwen2ForValueRmPad", 32 | "ParallelQwen2ForValueRmPadPP", 33 | "ParallelQwen2Model", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_linear import ( 18 | LinearForLastLayer, 19 | MergedColumnParallelLinear, 20 | QKVParallelLinear, 21 | ) 22 | from .parallel_mlp import ParallelLlamaMLP 23 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 24 | 25 | __all__ = [ 26 | "LinearForLastLayer", 27 | "MergedColumnParallelLinear", 28 | "QKVParallelLinear", 29 | "ParallelLlamaAttention", 30 | "ParallelLlamaDecoderLayer", 31 | "ParallelLlamaDecoderLayerRmPad", 32 | "ParallelLlamaMLP", 33 | "ParallelLlamaRMSNorm", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/verl/experimental/dataset/sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Amazon.com Inc and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from abc import abstractmethod 15 | from collections.abc import Sized 16 | 17 | from omegaconf import DictConfig 18 | from torch.utils.data import Sampler 19 | 20 | from verl import DataProto 21 | 22 | 23 | class AbstractSampler(Sampler[int]): 24 | """Abstract interface for custom samplers.""" 25 | 26 | @abstractmethod 27 | def __init__( 28 | self, 29 | data_source: Sized, 30 | data_config: DictConfig, 31 | ): 32 | pass 33 | 34 | 35 | class AbstractCurriculumSampler(AbstractSampler): 36 | """Experimental interface for curriculum learning samplers.""" 37 | 38 | @abstractmethod 39 | def update(self, batch: DataProto) -> None: 40 | pass 41 | -------------------------------------------------------------------------------- /verl/verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. 27 | -------------------------------------------------------------------------------- /verl/scripts/print_cfg.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | try: 15 | import hydra 16 | except ImportError as e: 17 | raise ImportError("Please install hydra-core via 'pip install hydra-core' and retry.") from e 18 | 19 | 20 | @hydra.main(config_path="../verl/trainer/config", config_name="ppo_trainer", version_base=None) 21 | def main(config): 22 | """Main entry point for PPO training with Hydra configuration management. 23 | 24 | Args: 25 | config_dict: Hydra configuration dictionary containing training parameters. 26 | """ 27 | print(config) 28 | from verl.utils.config import omega_conf_to_dataclass 29 | 30 | profiler_config = omega_conf_to_dataclass(config.critic.profiler) 31 | print(profiler_config) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /verl/verl/utils/transformers_compat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | Compatibility utilities for different versions of transformers library. 17 | """ 18 | 19 | # Handle version compatibility for flash_attn_supports_top_left_mask 20 | # This function was added in newer versions of transformers 21 | try: 22 | from transformers.modeling_flash_attention_utils import flash_attn_supports_top_left_mask 23 | except ImportError: 24 | # For older versions of transformers that don't have this function 25 | # Default to False as a safe fallback for older versions 26 | def flash_attn_supports_top_left_mask(): 27 | """Fallback implementation for older transformers versions. 28 | Returns False to disable features that require this function. 29 | """ 30 | return False 31 | -------------------------------------------------------------------------------- /verl/verl/utils/kernel/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 19 | # 20 | # Licensed under the Apache License, Version 2.0 (the "License"); 21 | # you may not use this file except in compliance with the License. 22 | # You may obtain a copy of the License at 23 | # 24 | # http://www.apache.org/licenses/LICENSE-2.0 25 | # 26 | # Unless required by applicable law or agreed to in writing, software 27 | # distributed under the License is distributed on an "AS IS" BASIS, 28 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 | # See the License for the specific language governing permissions and 30 | # limitations under the License. 31 | 32 | -------------------------------------------------------------------------------- /verl/verl/workers/reward_manager/abstract.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2025 SGLang Team 2 | # Copyright Amazon.com, Inc. or its affiliates. 3 | # Copyright 2025 ModelBest Inc. and/or its affiliates 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from abc import ABC, abstractmethod 18 | from typing import Any, Callable 19 | 20 | import torch 21 | 22 | from verl.protocol import DataProto 23 | 24 | RawRewardFn = Callable[..., Any] 25 | 26 | 27 | class AbstractRewardManager(ABC): 28 | @abstractmethod 29 | def __init__( 30 | self, 31 | tokenizer: Any, 32 | num_examine: int, 33 | compute_score: RawRewardFn | None, 34 | reward_fn_key: str = "data_source", 35 | **kwargs: Any, 36 | ): 37 | pass 38 | 39 | @abstractmethod 40 | def __call__( 41 | self, 42 | data: DataProto, 43 | return_dict: bool = False, 44 | ) -> torch.Tensor | dict[str, Any]: 45 | pass 46 | -------------------------------------------------------------------------------- /verl/verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from mathruler.grader import extract_boxed_content, grade_answer 17 | 18 | 19 | def format_reward(predict_str: str) -> float: 20 | pattern = re.compile(r".*.*\\boxed\{.*\}.*", re.DOTALL) 21 | match_result = re.fullmatch(pattern, predict_str) 22 | return 1.0 if match_result else 0.0 23 | 24 | 25 | def acc_reward(predict_str: str, ground_truth: str, use_boxed: bool = True) -> float: 26 | if use_boxed: 27 | answer = extract_boxed_content(predict_str) 28 | else: 29 | answer = predict_str 30 | return 1.0 if grade_answer(answer, ground_truth) else 0.0 31 | 32 | 33 | def compute_score(predict_str: str, ground_truth: str, use_boxed: bool = True, format_score: float = 0.1) -> float: 34 | return (1.0 - format_score) * acc_reward(predict_str, ground_truth, use_boxed) + format_score * format_reward( 35 | predict_str 36 | ) 37 | -------------------------------------------------------------------------------- /verl/verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | 16 | import os 17 | 18 | import torch.distributed 19 | 20 | from verl.utils.device import get_nccl_backend, get_torch_device 21 | 22 | 23 | def initialize_global_process_group(timeout_second=36000): 24 | from datetime import timedelta 25 | 26 | torch.distributed.init_process_group( 27 | get_nccl_backend(), 28 | timeout=timedelta(seconds=timeout_second), 29 | init_method=os.environ.get("DIST_INIT_METHOD", None), 30 | ) 31 | local_rank = int(os.environ["LOCAL_RANK"]) 32 | rank = int(os.environ["RANK"]) 33 | world_size = int(os.environ["WORLD_SIZE"]) 34 | 35 | if torch.distributed.is_initialized(): 36 | get_torch_device().set_device(local_rank) 37 | return local_rank, rank, world_size 38 | 39 | 40 | def destroy_global_process_group(): 41 | if torch.distributed.is_initialized(): 42 | torch.distributed.destroy_process_group() 43 | -------------------------------------------------------------------------------- /verl/verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | from verl.utils.device import get_device_id 18 | 19 | 20 | class MemoryBuffer: 21 | def __init__(self, numel, numel_padded, dtype): 22 | self.numel = numel 23 | self.numel_padded = numel_padded 24 | self.dtype = dtype 25 | self.data = torch.zeros(self.numel_padded, dtype=self.dtype, device=get_device_id(), requires_grad=False) 26 | 27 | def zero(self): 28 | """Reset the buffer to zero.""" 29 | self.data.zero_() 30 | 31 | def get(self, shape, start_index): 32 | """Return a tensor with the input `shape` as a view into the 33 | 1-D data starting at `start_index`.""" 34 | end_index = start_index + shape.numel() 35 | assert end_index <= self.numel, "requested tensor is out of the buffer range." 36 | buffer_tensor = self.data[start_index:end_index] 37 | buffer_tensor = buffer_tensor.view(shape) 38 | return buffer_tensor 39 | -------------------------------------------------------------------------------- /verl/verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from importlib.metadata import PackageNotFoundError, version 16 | 17 | from .vllm_rollout_spmd import vLLMAsyncRollout, vLLMRollout # noqa: F401 18 | 19 | 20 | def get_version(pkg): 21 | try: 22 | return version(pkg) 23 | except PackageNotFoundError: 24 | return None 25 | 26 | 27 | vllm_package_name = "vllm" 28 | vllm_package_version = get_version(vllm_package_name) 29 | if vllm_package_version is None: 30 | raise PackageNotFoundError( 31 | "To use vllm rollout, please ensure the 'vllm' package is properly installed. See " 32 | "https://verl.readthedocs.io/en/latest/start/install.html for more details" 33 | ) 34 | 35 | if "ROCM_PATH" in os.environ: 36 | import re 37 | 38 | match = re.match(r"(\d+\.\d+\.?\d*)", vllm_package_version) 39 | if match: 40 | vllm_package_version = match.group(1) 41 | else: 42 | raise ValueError(f"Warning: Could not parse version format: {vllm_package_version}") 43 | -------------------------------------------------------------------------------- /verl/verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | try: 16 | from math_verify.errors import TimeoutException 17 | from math_verify.metric import math_metric 18 | from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig 19 | except ImportError: 20 | print("To use Math-Verify, please install it first by running `pip install math-verify`.") 21 | 22 | 23 | def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> bool: 24 | verify_func = math_metric( 25 | gold_extraction_target=(LatexExtractionConfig(),), 26 | pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), 27 | ) 28 | ret_score = 0.0 29 | 30 | # Wrap the ground truth in \boxed{} format for verification 31 | ground_truth_boxed = "\\boxed{" + ground_truth + "}" 32 | try: 33 | ret_score, _ = verify_func([ground_truth_boxed], [model_output]) 34 | except Exception: 35 | pass 36 | except TimeoutException: 37 | ret_score = timeout_score 38 | 39 | return ret_score 40 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/ref/dp_ref.yaml: -------------------------------------------------------------------------------- 1 | # defaults specify the default config from each component 2 | defaults: 3 | 4 | # dp ref config, inheriting from trainer/config/ref/ref.yaml 5 | - ref 6 | 7 | # load the reference default config, then apply the fields in the current yaml 8 | - _self_ 9 | 10 | # ref model is assumed to be identical to actor model. Specify model.path for using a different ref model. 11 | # Potential use case involves on policy distillation where we calculate KL divergence between student actor 12 | # and teacher ref 13 | model: null 14 | 15 | # config for FSDP strategy 16 | fsdp_config: 17 | 18 | # Target class for this configuration 19 | _target_: verl.workers.config.FSDPEngineConfig 20 | 21 | # the wrap policy for FSDP model 22 | wrap_policy: 23 | 24 | # minimum number of params in a wrapped module 25 | min_num_params: 0 26 | 27 | # whether to offload parameters in FSDP 28 | param_offload: False 29 | 30 | # whether to perform reshard after model forward to save memory. 31 | # only for fsdp2, [True, False, int between 1 and fsdp_size] 32 | reshard_after_forward: True 33 | 34 | # Only for FSDP1: FSDP1 configuration, prefetch the next forward-pass all-gather 35 | # before the current forward computation. 36 | forward_prefetch: False 37 | 38 | # sequence parallel size 39 | # same as actor_rollout_ref.actor.ulysses_sequence_parallel_size if it exists, otherwise 1 40 | ulysses_sequence_parallel_size: ${oc.select:actor_rollout_ref.actor.ulysses_sequence_parallel_size,1} 41 | 42 | # calculate entropy with chunking to reduce memory peak 43 | entropy_from_logits_with_chunking: False 44 | 45 | # recompute entropy 46 | entropy_checkpointing: False 47 | -------------------------------------------------------------------------------- /verl/verl/utils/profiler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..device import is_npu_available 16 | from ..import_utils import is_nvtx_available 17 | from .performance import GPUMemoryLogger, log_gpu_memory_usage, simple_timer 18 | from .profile import DistProfilerExtension, ProfilerConfig 19 | 20 | if is_nvtx_available(): 21 | from .nvtx_profile import NsightSystemsProfiler as DistProfiler 22 | from .nvtx_profile import mark_annotate, mark_end_range, mark_start_range, marked_timer 23 | elif is_npu_available: 24 | from .mstx_profile import NPUProfiler as DistProfiler 25 | from .mstx_profile import mark_annotate, mark_end_range, mark_start_range, marked_timer 26 | else: 27 | from .performance import marked_timer 28 | from .profile import DistProfiler, mark_annotate, mark_end_range, mark_start_range 29 | 30 | __all__ = [ 31 | "GPUMemoryLogger", 32 | "log_gpu_memory_usage", 33 | "mark_start_range", 34 | "mark_end_range", 35 | "mark_annotate", 36 | "DistProfiler", 37 | "DistProfilerExtension", 38 | "ProfilerConfig", 39 | "simple_timer", 40 | "marked_timer", 41 | ] 42 | -------------------------------------------------------------------------------- /verl/verl/trainer/constants_ppo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import json 16 | import os 17 | 18 | from ray._private.runtime_env.constants import RAY_JOB_CONFIG_JSON_ENV_VAR 19 | 20 | PPO_RAY_RUNTIME_ENV = { 21 | "env_vars": { 22 | "TOKENIZERS_PARALLELISM": "true", 23 | "NCCL_DEBUG": "WARN", 24 | "VLLM_LOGGING_LEVEL": "WARN", 25 | "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "true", 26 | "CUDA_DEVICE_MAX_CONNECTIONS": "1", 27 | }, 28 | } 29 | 30 | 31 | def get_ppo_ray_runtime_env(): 32 | """ 33 | A filter function to return the PPO Ray runtime environment. 34 | To avoid repeat of some environment variables that are already set. 35 | """ 36 | working_dir = ( 37 | json.loads(os.environ.get(RAY_JOB_CONFIG_JSON_ENV_VAR, "{}")).get("runtime_env", {}).get("working_dir", None) 38 | ) 39 | 40 | runtime_env = { 41 | "env_vars": PPO_RAY_RUNTIME_ENV["env_vars"].copy(), 42 | **({"working_dir": None} if working_dir is None else {}), 43 | } 44 | for key in list(runtime_env["env_vars"].keys()): 45 | if os.environ.get(key) is not None: 46 | runtime_env["env_vars"].pop(key, None) 47 | return runtime_env 48 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/reward_model/dp_reward_model.yaml: -------------------------------------------------------------------------------- 1 | # Format checks enforced on CI: 2 | # 1. Comments must appear above each field. 3 | # 2. There must be a blank line between each field. 4 | # 3. Inline comments (after a field on the same line) are not allowed. 5 | # 4. Indentation level is respected for nested fields. 6 | 7 | # defaults specify the default config from each component 8 | defaults: 9 | 10 | # dp actor config, inheriting from trainer/config/reward_model/reward_model.yaml 11 | - reward_model 12 | 13 | # load the reference default config, then apply the fields in the current yaml 14 | - _self_ 15 | 16 | strategy: fsdp 17 | 18 | model: 19 | 20 | # Whether to use shared memory for loading the model 21 | use_shm: False 22 | 23 | # Use remove padding optimization (saves compute) 24 | use_remove_padding: False 25 | 26 | # Whether to use fused reward kernels for speedup 27 | use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels} 28 | 29 | # FSDP-specific config 30 | fsdp_config: 31 | 32 | # Target configuration dataclass 33 | _target_: verl.workers.config.FSDPEngineConfig 34 | 35 | # Policy for wrapping layers with FSDP 36 | wrap_policy: 37 | 38 | # Minimum number of parameters to trigger wrapping 39 | min_num_params: 0 40 | 41 | # Whether to offload model parameters to CPU 42 | param_offload: False 43 | 44 | # Only for FSDP2: Reshard after forward pass to reduce memory footprint 45 | reshard_after_forward: True 46 | 47 | # Number of GPUs in each FSDP shard group; -1 means auto 48 | fsdp_size: -1 49 | 50 | # Only for FSDP1: FSDP1 configuration, prefetch the next forward-pass all-gather 51 | # before the current forward computation. 52 | forward_prefetch: False 53 | 54 | # Sequence parallelism size for Ulysses-style model parallelism 55 | ulysses_sequence_parallel_size: 1 -------------------------------------------------------------------------------- /utils/data_preprocessing.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Any 2 | 3 | import json 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import h5py 8 | import datasets 9 | from huggingface_hub import snapshot_download 10 | from dataclasses import dataclass 11 | import sympy 12 | 13 | import warnings 14 | 15 | import argparse 16 | import os 17 | import re 18 | 19 | 20 | if __name__ == "__main__": 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--local_dir", default="./data/inference") 23 | 24 | args = parser.parse_args() 25 | local_dir = args.local_dir 26 | output_dir = "./data/inference" 27 | 28 | benchmark_dataset = [] 29 | sample_h5file_path = Path(local_dir) / "lsr_bench_data.hdf5" 30 | 31 | # lsr_synth 32 | for dataset_identifier in ['matsci','chem_react','bio_pop_growth','phys_osc']: 33 | ds = datasets.load_dataset(local_dir)[f'lsr_synth_{dataset_identifier}'] 34 | with h5py.File(sample_h5file_path, "r") as sample_file: 35 | for e in ds: 36 | samples = {k:v[...].astype(np.float64) for k,v in sample_file[f'/lsr_synth/{dataset_identifier}/{e["name"]}'].items()} 37 | item = { 38 | 'dataset_identifier': f'lsr_synth/{dataset_identifier}', 39 | 'equation_idx': e['name'], 40 | 'symbols': e['symbols'], 41 | 'symbol_descs': e['symbol_descs'], 42 | 'symbol_properties': e['symbol_properties'], 43 | 'expression': e['expression'], 44 | 'samples': samples # dict: ['train', 'test','ood_test'] 45 | } 46 | benchmark_dataset.append(item) 47 | 48 | print(len(benchmark_dataset)) 49 | benchmark_dataset = datasets.Dataset.from_list(benchmark_dataset) 50 | benchmark_dataset.to_parquet(os.path.join(output_dir, "llmsrbench.parquet")) -------------------------------------------------------------------------------- /verl/verl/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 3 | ## Adding a New Huggingface Model 4 | ### Step 1: Copy the model file from HF to verl 5 | - Add a new file under verl/models/hf 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf 7 | 8 | ### Step 2: Modify the model file to use packed inputs 9 | - Remove all the code related to inference (kv cache) 10 | - Modify the inputs to include only 11 | - input_ids (total_nnz,) 12 | - cu_seqlens (total_nnz + 1,) 13 | - max_seqlen_in_batch: int 14 | - Note that this requires using flash attention with causal mask. 15 | 16 | ### Step 2.5: Add tests 17 | - Add a test to compare this version and the huggingface version 18 | - Following the infrastructure and add tests to tests/models/hf 19 | 20 | ### Step 3: Add a function to apply tensor parallelism 21 | - Please follow 22 | - https://pytorch.org/docs/stable/distributed.tensor.parallel.html 23 | - https://pytorch.org/tutorials/intermediate/TP_tutorial.html 24 | - General comments 25 | - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward. 26 | 27 | ### Step 4: Add a function to apply data parallelism 28 | - Please use FSDP2 APIs 29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413 30 | 31 | ### Step 5: Add a function to apply pipeline parallelism 32 | - Comes in Pytorch 2.4 33 | - Currently only in alpha in nightly version 34 | - Check torchtitan for more details 35 | 36 | -------------------------------------------------------------------------------- /verl/verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for reward model 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | from verl import DataProto 21 | 22 | 23 | class BasePPORewardModel(ABC): 24 | def __init__(self, config): 25 | self.config = config 26 | 27 | @abstractmethod 28 | def compute_reward(self, data: DataProto) -> DataProto: 29 | """Computing reward given input_ids. The transformers should output a tensor with shape 30 | [batch_size, sequence_length], and the value at [EOS] mask should be gathered. 31 | 32 | Args: 33 | data: must contain keys "input_ids", "attention_mask" and "position_ids". 34 | - input_ids: [batch_size, sequence_length] 35 | - attention_mask: [batch_size, sequence_length] 36 | - position_ids: [batch_size, sequence_length] 37 | 38 | Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward. 39 | Other position should have zero reward. Note that this may change in the future if we use 40 | dense reward. So, we leave the interface for general case. 41 | - reward: [batch_size, sequence_length]. 42 | 43 | """ 44 | pass 45 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | device: cuda 5 | 6 | data: 7 | path: ~/data/rlhf/math/test.parquet 8 | prompt_key: prompt 9 | n_samples: 5 10 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 11 | batch_size: 128 12 | 13 | model: 14 | path: ~/models/Qwen2-7B-Instruct 15 | external_lib: null 16 | rollout: 17 | _target_: verl.workers.config.RolloutConfig 18 | name: vllm 19 | mode: sync # sync: LLM, async: AsyncLLM 20 | temperature: 1.0 21 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 22 | top_p: 0.7 23 | prompt_length: 1536 24 | response_length: 512 25 | # for vllm rollout 26 | dtype: bfloat16 # should align with FSDP 27 | gpu_memory_utilization: 0.5 28 | ignore_eos: False 29 | enforce_eager: True 30 | free_cache_engine: True 31 | load_format: dummy_dtensor 32 | tensor_model_parallel_size: 1 33 | max_num_batched_tokens: 8192 34 | max_model_len: null 35 | max_num_seqs: 1024 36 | log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu 37 | log_prob_micro_batch_size_per_gpu: 8 38 | # for hf rollout 39 | do_sample: True 40 | disable_log_stats: True 41 | enable_chunked_prefill: True 42 | n: 1 43 | # support logging rollout prob for debugging purpose 44 | calculate_log_probs: False 45 | actor: 46 | strategy: fsdp # This is for backward-compatibility 47 | ulysses_sequence_parallel_size: 1 # sp size 48 | entropy_from_logits_with_chunking: False # calculate entropy with chunking to reduce memory peak 49 | entropy_checkpointing: False # recompute entropy 50 | fsdp_config: 51 | fsdp_size: -1 52 | forward_prefetch: False # FSDP1 forward_prefetch configuration 53 | 54 | ray_kwargs: 55 | ray_init: 56 | num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. 57 | timeline_json_file: null 58 | -------------------------------------------------------------------------------- /verl/verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | 17 | import torch 18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 19 | from megatron.core import ModelParallelConfig 20 | from torch import nn 21 | from transformers import LlamaConfig 22 | 23 | from verl.utils.megatron import sequence_parallel as sp_utils 24 | 25 | 26 | class ParallelLlamaRMSNorm(nn.Module): 27 | def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig): 28 | """ 29 | LlamaRMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine( 43 | input=hidden_states, 44 | weight=self.weight, 45 | normalized_shape=self.normalized_shape, 46 | eps=self.variance_epsilon, 47 | memory_efficient=True, 48 | ) 49 | -------------------------------------------------------------------------------- /verl/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | 17 | import torch 18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 19 | from megatron.core import ModelParallelConfig 20 | from torch import nn 21 | from transformers import Qwen2Config 22 | 23 | from verl.utils.megatron import sequence_parallel as sp_utils 24 | 25 | 26 | class ParallelQwen2RMSNorm(nn.Module): 27 | def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig): 28 | """ 29 | Qwen2RMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine( 43 | input=hidden_states, 44 | weight=self.weight, 45 | normalized_shape=self.normalized_shape, 46 | eps=self.variance_epsilon, 47 | memory_efficient=True, 48 | ) 49 | -------------------------------------------------------------------------------- /verl/verl/utils/metric/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Metrics utils. 16 | """ 17 | 18 | from typing import Any 19 | 20 | import numpy as np 21 | 22 | 23 | def reduce_metrics(metrics: dict[str, list[Any]]) -> dict[str, Any]: 24 | """ 25 | Reduces a dictionary of metric lists by computing the mean, max, or min of each list. 26 | The reduce operation is determined by the key name: 27 | - If the key contains "max", np.max is used 28 | - If the key contains "min", np.min is used 29 | - Otherwise, np.mean is used 30 | 31 | Args: 32 | metrics: A dictionary mapping metric names to lists of metric values. 33 | 34 | Returns: 35 | A dictionary with the same keys but with each list replaced by its reduced value. 36 | 37 | Example: 38 | >>> metrics = { 39 | ... "loss": [1.0, 2.0, 3.0], 40 | ... "accuracy": [0.8, 0.9, 0.7], 41 | ... "max_reward": [5.0, 8.0, 6.0], 42 | ... "min_error": [0.1, 0.05, 0.2] 43 | ... } 44 | >>> reduce_metrics(metrics) 45 | {"loss": 2.0, "accuracy": 0.8, "max_reward": 8.0, "min_error": 0.05} 46 | """ 47 | for key, val in metrics.items(): 48 | if "max" in key: 49 | metrics[key] = np.max(val) 50 | elif "min" in key: 51 | metrics[key] = np.min(val) 52 | else: 53 | metrics[key] = np.mean(val) 54 | return metrics 55 | -------------------------------------------------------------------------------- /inference/scripts/inference.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | PARQUET_FILE_PATH="../data/llmsrbench.parquet" 4 | OUTPUT_JSON_PATH="./output/memory_default/gpt-oss-120b-tool2_0_001_s40_max_25_top3_N1_test.json" 5 | MODEL_PATH="../models/Qwen3-Coder-30B-A3B-Instruct" 6 | 7 | MODEL_URL="http://0.0.0.0:30000/v1" 8 | SANDBOX_URLS=( 9 | "http://127.0.0.1:9010/run_code" 10 | "http://127.0.0.1:9020/run_code" 11 | "http://127.0.0.1:9030/run_code" 12 | "http://127.0.0.1:9040/run_code" 13 | "http://127.0.0.1:9050/run_code" 14 | "http://127.0.0.1:9060/run_code" 15 | "http://127.0.0.1:9070/run_code" 16 | "http://127.0.0.1:9080/run_code" 17 | ) 18 | 19 | 20 | MAPE_THRESHOLD=0.001 21 | NUM_TURNS=5 22 | MAX_ASSISTANT_TURNS=5 23 | TOP_K=3 24 | SOURCE=( 25 | "lsr_synth/bio_pop_growth" 26 | ) 27 | 28 | 29 | conda activate srscientist 30 | cd inference/ 31 | 32 | 33 | TIMESTAMP=$(TZ='UTC-8' date +'%Y%m%d_%H%M%S') 34 | 35 | 36 | BASENAME=$(basename "$OUTPUT_JSON_PATH" .json) 37 | 38 | # 3. Get the output directory and replace "output" with "log" 39 | OUTPUT_DIR=$(dirname "$OUTPUT_JSON_PATH") 40 | LOG_DIR="${OUTPUT_DIR/output/log}" 41 | 42 | # 4. Construct the full log file path 43 | LOG_FILE="${LOG_DIR}/${BASENAME}_${TIMESTAMP}.log" 44 | 45 | # 5. Create the log and output directories if they don't exist 46 | mkdir -p "$LOG_DIR" 47 | mkdir -p "$OUTPUT_DIR" 48 | 49 | 50 | 51 | mkdir -p "$(dirname "$OUTPUT_JSON_PATH")" 52 | echo "--- Starting main.py execution at $(TZ='UTC-8' date) ---" | tee -a "$LOG_FILE" 53 | 54 | python main.py \ 55 | --model-name "$MODEL_PATH" \ 56 | --model-url "$MODEL_URL" \ 57 | --sandbox-urls "${SANDBOX_URLS[@]}" \ 58 | --parquet-file-path "$PARQUET_FILE_PATH" \ 59 | --mape-threshold $MAPE_THRESHOLD \ 60 | --num-turns $NUM_TURNS \ 61 | --max-assistant-turns $MAX_ASSISTANT_TURNS \ 62 | --top-k $TOP_K \ 63 | --source "${SOURCE[@]}" \ 64 | --output-json-path "$OUTPUT_JSON_PATH" 2>&1 | tee "$LOG_FILE" 65 | 66 | echo "--- Finished main.py execution at $(TZ='UTC-8' date) ---" | tee -a "$LOG_FILE" 67 | -------------------------------------------------------------------------------- /verl/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | **/*.pt 3 | **/checkpoints 4 | **/wget-log 5 | **/_build/ 6 | **/*.ckpt 7 | **/outputs 8 | **/*.tar.gz 9 | **/playground 10 | **/wandb 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | dataset/* 17 | tensorflow/my_graph/* 18 | .idea/ 19 | # C extensions 20 | *.so 21 | 22 | # Distribution / packaging 23 | .Python 24 | env/ 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | tmp/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *,cover 60 | .hypothesis/ 61 | pytest.ini 62 | output.txt 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # IPython Notebook 86 | .ipynb_checkpoints 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # dotenv 95 | .env 96 | 97 | # virtualenv 98 | venv/ 99 | .venv/ 100 | ENV/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # vscode 109 | .vscode 110 | 111 | # Mac 112 | .DS_Store 113 | 114 | # vim 115 | *.swp 116 | 117 | # ckpt 118 | *.lock 119 | 120 | # data 121 | *.parquet 122 | 123 | 124 | # local logs 125 | logs 126 | log 127 | outputs 128 | .history 129 | -------------------------------------------------------------------------------- /verl/verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from megatron.core import parallel_state as mpu 19 | 20 | 21 | def mark_parameter_as_sequence_parallel(parameter): 22 | parameter.sequence_parallel = True 23 | 24 | 25 | def is_sequence_parallel_param(param): 26 | return hasattr(param, "sequence_parallel") and param.sequence_parallel 27 | 28 | 29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor): 30 | """pad the tokens such that the total length is a multiple of sp world size 31 | 32 | Args: 33 | unpad_tokens: (total_nnz, ...). Tokens after removing padding 34 | 35 | Returns: 36 | the padded tokens: (total_nnz + pad_size,...) 37 | 38 | """ 39 | total_nnz = unpad_tokens.shape[0] 40 | sp_world_size = mpu.get_tensor_model_parallel_world_size() 41 | 42 | pad_size = 0 if total_nnz % sp_world_size == 0 else sp_world_size - total_nnz % sp_world_size 43 | 44 | if pad_size > 0: 45 | if unpad_tokens.ndim == 1: 46 | unpad_tokens = F.pad(unpad_tokens, (0, pad_size)) 47 | elif unpad_tokens.ndim == 2: 48 | unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size)) 49 | else: 50 | raise NotImplementedError(f"Padding dim {unpad_tokens.ndim()} is not supported") 51 | 52 | return unpad_tokens 53 | -------------------------------------------------------------------------------- /verl/scripts/generate_trainer_config.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euox pipefail 3 | 4 | 5 | # Define config specifications: "config_name:output_file:config_arg" 6 | CONFIG_SPECS=( 7 | "ppo_trainer:_generated_ppo_trainer.yaml:" 8 | "ppo_megatron_trainer:_generated_ppo_megatron_trainer.yaml:--config-name=ppo_megatron_trainer.yaml" 9 | ) 10 | 11 | generate_config() { 12 | local config_name="$1" 13 | local output_file="$2" 14 | local config_arg="$3" 15 | 16 | local target_cfg="verl/trainer/config/${output_file}" 17 | local tmp_header=$(mktemp) 18 | local tmp_cfg=$(mktemp) 19 | 20 | echo "# This reference configration yaml is automatically generated via 'scripts/generate_trainer_config.sh'" > "$tmp_header" 21 | echo "# in which it invokes 'python3 scripts/print_cfg.py --cfg job ${config_arg}' to flatten the 'verl/trainer/config/${config_name}.yaml' config fields into a single file." >> "$tmp_header" 22 | echo "# Do not modify this file directly." >> "$tmp_header" 23 | echo "# The file is usually only for reference and never used." >> "$tmp_header" 24 | echo "" >> "$tmp_header" 25 | 26 | python3 scripts/print_cfg.py --cfg job ${config_arg} > "$tmp_cfg" 27 | 28 | cat "$tmp_header" > "$target_cfg" 29 | sed -n '/^actor_rollout_ref/,$p' "$tmp_cfg" >> "$target_cfg" 30 | 31 | rm "$tmp_cfg" "$tmp_header" 32 | 33 | echo "Generated: $target_cfg" 34 | } 35 | 36 | for spec in "${CONFIG_SPECS[@]}"; do 37 | IFS=':' read -r config_name output_file config_arg <<< "$spec" 38 | generate_config "$config_name" "$output_file" "$config_arg" 39 | done 40 | 41 | for spec in "${CONFIG_SPECS[@]}"; do 42 | IFS=':' read -r config_name output_file config_arg <<< "$spec" 43 | target_cfg="verl/trainer/config/${output_file}" 44 | if ! git diff --exit-code -- "$target_cfg" >/dev/null; then 45 | echo "✖ $target_cfg is out of date. Please regenerate via 'scripts/generate_trainer_config.sh' and commit the changes." 46 | exit 1 47 | fi 48 | done 49 | 50 | echo "All good" 51 | exit 0 52 | -------------------------------------------------------------------------------- /verl/verl/workers/roles/actor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl import DataProto 16 | from verl.single_controller.base import Worker 17 | from verl.single_controller.base.decorator import Dispatch, register 18 | 19 | 20 | class ActorWorker(Worker): 21 | """ 22 | This worker can be instantiated as a standalone actor or a standalone rollout or a standalone reference policy 23 | or a hybrid engine based on the config.rollout 24 | """ 25 | 26 | def __init__(self, config): 27 | raise NotImplementedError 28 | 29 | @register(dispatch_mode=Dispatch.ONE_TO_ALL) 30 | def init_model(self): 31 | raise NotImplementedError 32 | 33 | @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO) 34 | def update_actor(self, data: DataProto): 35 | raise NotImplementedError 36 | 37 | @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO) 38 | def compute_log_prob(self, data: DataProto): 39 | raise NotImplementedError 40 | 41 | @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO) 42 | def compute_ref_log_prob(self, data: DataProto): 43 | raise NotImplementedError 44 | 45 | @register(dispatch_mode=Dispatch.ONE_TO_ALL) 46 | def save_checkpoint(self, local_path, hdfs_path=None, global_step=0, max_ckpt_to_keep=None): 47 | raise NotImplementedError 48 | 49 | @register(dispatch_mode=Dispatch.ONE_TO_ALL) 50 | def load_checkpoint(self, local_path, hdfs_path=None, del_local_after_load=False): 51 | raise NotImplementedError 52 | -------------------------------------------------------------------------------- /verl/verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from importlib.metadata import PackageNotFoundError, version 16 | 17 | from packaging import version as vs 18 | 19 | from verl.utils.import_utils import is_sglang_available 20 | 21 | 22 | def get_version(pkg): 23 | try: 24 | return version(pkg) 25 | except PackageNotFoundError: 26 | return None 27 | 28 | 29 | package_name = "vllm" 30 | package_version = get_version(package_name) 31 | vllm_version = None 32 | 33 | if package_version is None: 34 | if not is_sglang_available(): 35 | raise ValueError( 36 | f"vllm version {package_version} not supported and SGLang also not Found. Currently supported " 37 | f"vllm versions are 0.7.0+" 38 | ) 39 | elif vs.parse(package_version) >= vs.parse("0.7.0"): 40 | vllm_version = package_version 41 | from vllm import LLM 42 | from vllm.distributed import parallel_state 43 | else: 44 | if vs.parse(package_version) in [vs.parse("0.5.4"), vs.parse("0.6.3")]: 45 | raise ValueError( 46 | f"vLLM version {package_version} support has been removed. vLLM 0.5.4 and 0.6.3 are no longer " 47 | f"supported. Please use vLLM 0.7.0 or later." 48 | ) 49 | if not is_sglang_available(): 50 | raise ValueError( 51 | f"vllm version {package_version} not supported and SGLang also not Found. Currently supported " 52 | f"vllm versions are 0.7.0+" 53 | ) 54 | 55 | __all__ = ["LLM", "parallel_state"] 56 | -------------------------------------------------------------------------------- /verl/verl/tools/utils/mcp_clients/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import threading 17 | import time 18 | 19 | from mcp import Tool 20 | 21 | logger = logging.getLogger(__file__) 22 | 23 | 24 | class TokenBucket: 25 | def __init__(self, rate_limit: float): 26 | self.rate_limit = rate_limit # tokens per second 27 | self.tokens = rate_limit 28 | self.last_update = time.time() 29 | self.lock = threading.Lock() 30 | 31 | def acquire(self) -> bool: 32 | with self.lock: 33 | now = time.time() 34 | # Add new tokens based on time elapsed 35 | new_tokens = (now - self.last_update) * self.rate_limit 36 | self.tokens = min(self.rate_limit, self.tokens + new_tokens) 37 | self.last_update = now 38 | 39 | if self.tokens >= 1: 40 | self.tokens -= 1 41 | return True 42 | return False 43 | 44 | 45 | def mcp2openai(mcp_tool: Tool) -> dict: 46 | """Convert a MCP Tool to an OpenAI ChatCompletionTool.""" 47 | openai_format = { 48 | "type": "function", 49 | "function": { 50 | "name": mcp_tool.name, 51 | "description": mcp_tool.description, 52 | "parameters": mcp_tool.inputSchema, 53 | "strict": False, 54 | }, 55 | } 56 | if not openai_format["function"]["parameters"].get("required", None): 57 | openai_format["function"]["parameters"]["required"] = [] 58 | return openai_format 59 | -------------------------------------------------------------------------------- /verl/verl/workers/reward_manager/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Callable 16 | 17 | from verl.workers.reward_manager.abstract import AbstractRewardManager 18 | 19 | __all__ = ["register", "get_reward_manager_cls"] 20 | 21 | REWARD_MANAGER_REGISTRY: dict[str, type[AbstractRewardManager]] = {} 22 | 23 | 24 | def register(name: str) -> Callable[[type[AbstractRewardManager]], type[AbstractRewardManager]]: 25 | """Decorator to register a reward manager class with a given name. 26 | 27 | Args: 28 | name: `(str)` 29 | The name of the reward manager. 30 | """ 31 | 32 | def decorator(cls: type[AbstractRewardManager]) -> type[AbstractRewardManager]: 33 | if name in REWARD_MANAGER_REGISTRY and REWARD_MANAGER_REGISTRY[name] != cls: 34 | raise ValueError( 35 | f"Reward manager {name} has already been registered: {REWARD_MANAGER_REGISTRY[name]} vs {cls}" 36 | ) 37 | REWARD_MANAGER_REGISTRY[name] = cls 38 | return cls 39 | 40 | return decorator 41 | 42 | 43 | def get_reward_manager_cls(name: str) -> type[AbstractRewardManager]: 44 | """Get the reward manager class with a given name. 45 | 46 | Args: 47 | name: `(str)` 48 | The name of the reward manager. 49 | 50 | Returns: 51 | `(type)`: The reward manager class. 52 | """ 53 | if name not in REWARD_MANAGER_REGISTRY: 54 | raise ValueError(f"Unknown reward manager: {name}") 55 | return REWARD_MANAGER_REGISTRY[name] 56 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/reward_model/megatron_reward_model.yaml: -------------------------------------------------------------------------------- 1 | # defaults specify the default config from each component 2 | defaults: 3 | 4 | # dp actor config, inheriting from trainer/config/reward_model/reward_model.yaml 5 | - reward_model 6 | 7 | # load the reference default config, then apply the fields in the current yaml 8 | - _self_ 9 | 10 | strategy: megatron 11 | 12 | # seconds, default is 10 minutes for torch, you can set it to a larger value 13 | # if you have long-running operations like 32B or 72B model using megatron 14 | nccl_timeout: 600 15 | 16 | # Megatron parallelism & checkpointing config 17 | megatron: 18 | 19 | # Target configuration dataclass 20 | _target_: verl.workers.config.MegatronEngineConfig 21 | 22 | # Whether to offload model parameters to CPU 23 | param_offload: False 24 | 25 | # Number of GPUs in tensor model parallel group 26 | tensor_model_parallel_size: 1 27 | 28 | # Number of GPUs in expert model parallel group 29 | expert_model_parallel_size: 1 30 | 31 | # Expert tensor parallel size 32 | expert_tensor_parallel_size: null 33 | 34 | # Number of pipeline model parallel stages 35 | pipeline_model_parallel_size: 1 36 | 37 | # change VPP interface for parallelism tests 38 | virtual_pipeline_model_parallel_size: null 39 | 40 | # Context parallel size 41 | context_parallel_size: 1 42 | 43 | # Whether to use sequence parallelism 44 | sequence_parallel: True 45 | 46 | # Whether to use distributed optimizer 47 | use_distributed_optimizer: False 48 | 49 | # Whether to enable distributed checkpointing 50 | use_dist_checkpointing: False 51 | 52 | # Path for distributed checkpoints 53 | dist_checkpointing_path: null 54 | 55 | # RNG seed for megatron 56 | seed: ${oc.select:actor_rollout_ref.actor.megatron.seed,42} 57 | 58 | # Any overrides to transformer config 59 | override_transformer_config: ${oc.select:actor_rollout_ref.actor.megatron.override_transformer_config,{}} 60 | 61 | # Whether to use mbridge for faster comms 62 | use_mbridge: ${oc.select:actor_rollout_ref.actor.megatron.use_mbridge,False} 63 | 64 | # Whether to load weights (default True) 65 | load_weight: True -------------------------------------------------------------------------------- /verl/verl/models/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import importlib 16 | from typing import Optional 17 | 18 | import torch.nn as nn 19 | 20 | # Supported models in Megatron-LM 21 | # Architecture -> (module, class). 22 | _MODELS = { 23 | "LlamaForCausalLM": ( 24 | "llama", 25 | ("ParallelLlamaForCausalLMRmPadPP", "ParallelLlamaForValueRmPadPP", "ParallelLlamaForCausalLMRmPad"), 26 | ), 27 | "Qwen2ForCausalLM": ( 28 | "qwen2", 29 | ("ParallelQwen2ForCausalLMRmPadPP", "ParallelQwen2ForValueRmPadPP", "ParallelQwen2ForCausalLMRmPad"), 30 | ), 31 | "MistralForCausalLM": ( 32 | "mistral", 33 | ("ParallelMistralForCausalLMRmPadPP", "ParallelMistralForValueRmPadPP", "ParallelMistralForCausalLMRmPad"), 34 | ), 35 | } 36 | 37 | 38 | # return model class 39 | class ModelRegistry: 40 | @staticmethod 41 | def load_model_cls(model_arch: str, value=False) -> Optional[type[nn.Module]]: 42 | if model_arch not in _MODELS: 43 | return None 44 | 45 | megatron = "megatron" 46 | 47 | module_name, model_cls_name = _MODELS[model_arch] 48 | if not value: # actor/ref 49 | model_cls_name = model_cls_name[0] 50 | elif value: # critic/rm 51 | model_cls_name = model_cls_name[1] 52 | 53 | module = importlib.import_module(f"verl.models.{module_name}.{megatron}.modeling_{module_name}_megatron") 54 | return getattr(module, model_cls_name, None) 55 | 56 | @staticmethod 57 | def get_supported_archs() -> list[str]: 58 | return list(_MODELS.keys()) 59 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/ref/ref.yaml: -------------------------------------------------------------------------------- 1 | # actor_rollout_ref.ref: FSDP config same as actor. For models larger than 7B, it’s recommended to turn on offload for ref by default 2 | strategy: ${actor_rollout_ref.actor.strategy} 3 | 4 | # whether to enable torch.compile 5 | # same as actor_rollout_ref.actor.use_torch_compile if it exists, otherwise 1 6 | use_torch_compile: ${oc.select:actor_rollout_ref.actor.use_torch_compile,true} 7 | 8 | # [Will be deprecated, use log_prob_micro_batch_size_per_gpu] 9 | # The batch size for one forward pass in the computation of log_prob. Global batch size. 10 | log_prob_micro_batch_size: null 11 | 12 | # The batch size for one forward pass in the computation of log_prob. Local batch size per GPU. 13 | log_prob_micro_batch_size_per_gpu: null 14 | 15 | # enable dynamic batch size (sequence packing) for log_prob computation 16 | # same as actor_rollout_ref.actor.use_dynamic_bsz if it exists, otherwise false 17 | log_prob_use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false} 18 | 19 | # the max token length per GPU 20 | # same as actor_rollout_ref.actor.ppo_max_token_len_per_gpu if it exists, otherwise 16384 21 | log_prob_max_token_len_per_gpu: ${oc.select:actor_rollout_ref.actor.ppo_max_token_len_per_gpu,16384} 22 | 23 | # profile the ref model in `compute_log_prob` 24 | profiler: 25 | 26 | # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs 27 | _target_: verl.utils.profiler.ProfilerConfig 28 | 29 | # profiler tool, default same as profiler.tool in global config 30 | # choices: nsys, npu, torch 31 | tool: ${oc.select:global_profiler.tool,null} 32 | 33 | # whether enable profile on ref 34 | enable: ${oc.select:actor_rollout_ref.actor.profiler.enable,false} 35 | 36 | # Whether to profile all ranks. 37 | all_ranks: ${oc.select:actor_rollout_ref.actor.profiler.all_ranks,false} 38 | 39 | # The ranks that will be profiled. [] or [0,1,...] 40 | ranks: ${oc.select:actor_rollout_ref.actor.profiler.ranks,[]} 41 | 42 | # profile results saving path 43 | save_path: ${oc.select:global_profiler.save_path,null} 44 | 45 | # specific tool config 46 | tool_config: ${oc.select:actor_rollout_ref.actor.profiler.tool_config,null} -------------------------------------------------------------------------------- /verl/.github/workflows/.deprecate/e2e_prime.yml: -------------------------------------------------------------------------------- 1 | name: e2e_prime_deprecate 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - disabled_ci 9 | pull_request: 10 | branches: 11 | - disabled_ci 12 | paths: 13 | - "**/*.py" 14 | # Other entrypoints 15 | - "!examples/**" 16 | - "!tests/**" 17 | - "!verl/trainer/main_*.py" 18 | - "!verl/trainer/fsdp_sft_trainer.py" 19 | # Other recipes 20 | - "!recipe/**" 21 | # Megatron 22 | - "!verl/workers/**/megatron_*.py" 23 | # Home 24 | - "recipe/prime" 25 | # Entrypoints 26 | - ".github/workflows/e2e_prime.yml" 27 | - "examples/data_preprocess/gsm8k.py" 28 | - "tests/special_e2e/run_prime.sh" 29 | 30 | # Cancel jobs on the same ref if a new one is triggered 31 | concurrency: 32 | group: ${{ github.workflow }}-${{ github.ref }} 33 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 34 | 35 | # Declare permissions just read content. 36 | permissions: 37 | contents: read 38 | 39 | jobs: 40 | e2e_prime: 41 | runs-on: [L20x8] 42 | timeout-minutes: 50 # Increase this timeout value as needed 43 | env: 44 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 45 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 46 | NO_PROXY: "localhost,127.0.0.1,hf-mirror.com" 47 | HF_ENDPOINT: "https://hf-mirror.com" 48 | HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable 49 | container: 50 | image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3 51 | options: --gpus all --shm-size=10g 52 | steps: 53 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 54 | with: 55 | fetch-depth: 0 56 | - name: Install the current repository 57 | run: | 58 | pip3 install --no-deps -e .[test,gpu] 59 | - name: Prepare gsm8k dataset 60 | run: | 61 | ray stop --force 62 | python3 examples/data_preprocess/gsm8k.py 63 | - name: Running GSM8K E2E with prime alg 64 | run: | 65 | ray stop --force 66 | bash tests/special_e2e/run_prime.sh 67 | -------------------------------------------------------------------------------- /verl/verl/models/transformers/npu_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Copyright 2025 The Qwen Team and The HuggingFace Inc. team 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | import torch 19 | import torch_npu 20 | from torch_npu import npu_rotary_mul as apply_rotary_emb 21 | from transformers.models.qwen2_5_vl import modeling_qwen2_5_vl 22 | from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2RMSNorm 23 | 24 | 25 | # This patch takes effect when using apply_rotary_pos_emb_flashatt on qwen2_5_vl and will be removed in 26 | # subsequent versions 27 | # https://github.com/huggingface/transformers/pull/38491 28 | def apply_rotary_pos_emb_flashatt_npu( 29 | q: torch.Tensor, k: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor 30 | ) -> tuple[torch.Tensor, torch.Tensor]: 31 | cos = cos.chunk(2, dim=-1)[0].contiguous() 32 | sin = sin.chunk(2, dim=-1)[0].contiguous() 33 | cos = cos.repeat(1, 2) 34 | sin = sin.repeat(1, 2) 35 | q_embed = apply_rotary_emb( 36 | q.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float() 37 | ).type_as(q) 38 | k_embed = apply_rotary_emb( 39 | k.float(), cos.unsqueeze(0).unsqueeze(2).float(), sin.unsqueeze(0).unsqueeze(2).float() 40 | ).type_as(k) 41 | return q_embed, k_embed 42 | 43 | 44 | # This api can improve performance on ASCEND NPU 45 | def rms_norm_forward(self, x): 46 | return torch_npu.npu_rms_norm(x, self.weight, epsilon=self.variance_epsilon)[0] 47 | 48 | 49 | Qwen2RMSNorm.forward = rms_norm_forward 50 | modeling_qwen2_5_vl.apply_rotary_pos_emb_flashatt = apply_rotary_pos_emb_flashatt_npu 51 | -------------------------------------------------------------------------------- /verl/verl/utils/net_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. 27 | import ipaddress 28 | 29 | 30 | def is_ipv4(ip_str: str) -> bool: 31 | """ 32 | Check if the given string is an IPv4 address 33 | 34 | Args: 35 | ip_str: The IP address string to check 36 | 37 | Returns: 38 | bool: Returns True if it's an IPv4 address, False otherwise 39 | """ 40 | try: 41 | ipaddress.IPv4Address(ip_str) 42 | return True 43 | except ipaddress.AddressValueError: 44 | return False 45 | 46 | 47 | def is_ipv6(ip_str: str) -> bool: 48 | """ 49 | Check if the given string is an IPv6 address 50 | 51 | Args: 52 | ip_str: The IP address string to check 53 | 54 | Returns: 55 | bool: Returns True if it's an IPv6 address, False otherwise 56 | """ 57 | try: 58 | ipaddress.IPv6Address(ip_str) 59 | return True 60 | except ipaddress.AddressValueError: 61 | return False 62 | -------------------------------------------------------------------------------- /verl/verl/workers/actor/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for Actor 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | import torch 21 | 22 | from verl import DataProto 23 | 24 | __all__ = ["BasePPOActor"] 25 | 26 | 27 | class BasePPOActor(ABC): 28 | def __init__(self, config): 29 | """The base class for PPO actor 30 | 31 | Args: 32 | config (DictConfig): a config passed to the PPOActor. We expect the type to be 33 | DictConfig (https://omegaconf.readthedocs.io/), but it can be any namedtuple in general. 34 | """ 35 | super().__init__() 36 | self.config = config 37 | 38 | @abstractmethod 39 | def compute_log_prob(self, data: DataProto) -> torch.Tensor: 40 | """Compute logits given a batch of data. 41 | 42 | Args: 43 | data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```, 44 | ```attention_mask``` and ```position_ids```. 45 | 46 | Returns: 47 | DataProto: a DataProto containing the key ```log_probs``` 48 | 49 | 50 | """ 51 | pass 52 | 53 | @abstractmethod 54 | def update_policy(self, data: DataProto) -> dict: 55 | """Update the policy with an iterator of DataProto 56 | 57 | Args: 58 | data (DataProto): an iterator over the DataProto that returns by 59 | ```make_minibatch_iterator``` 60 | 61 | Returns: 62 | Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model 63 | such as ```loss```, ```grad_norm```, etc,. 64 | 65 | """ 66 | pass 67 | -------------------------------------------------------------------------------- /verl/verl/utils/megatron/dist_checkpointing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from megatron.core import dist_checkpointing, mpu 16 | from megatron.core.dist_checkpointing.serialization import ( 17 | get_default_load_sharded_strategy, 18 | get_default_save_sharded_strategy, 19 | ) 20 | from megatron.core.dist_checkpointing.strategies.fully_parallel import ( 21 | FullyParallelLoadStrategyWrapper, 22 | FullyParallelSaveStrategyWrapper, 23 | ) 24 | 25 | 26 | def save_dist_checkpointing(sharded_state_dict, ckpt_path, async_save=False): 27 | validate_sharding_integrity = True 28 | # Get checkpointing strategies 29 | save_strategy = get_default_save_sharded_strategy("torch_dist") 30 | save_strategy = FullyParallelSaveStrategyWrapper( 31 | save_strategy, mpu.get_data_parallel_group(with_context_parallel=True) 32 | ) 33 | 34 | # Save model sharded state dicts 35 | async_save_request = dist_checkpointing.save( 36 | sharded_state_dict, 37 | ckpt_path, 38 | sharded_strategy=save_strategy, 39 | async_sharded_save=async_save, 40 | validate_access_integrity=validate_sharding_integrity, 41 | ) 42 | 43 | return async_save_request 44 | 45 | 46 | def load_dist_checkpointing(sharded_state_dict, ckpt_dir): 47 | # Get checkpointing strategies 48 | load_strategy = get_default_load_sharded_strategy(ckpt_dir) 49 | load_strategy = FullyParallelLoadStrategyWrapper( 50 | load_strategy, mpu.get_data_parallel_group(with_context_parallel=True) 51 | ) 52 | 53 | # Load model sharded state dicts 54 | state_dict = dist_checkpointing.load(sharded_state_dict, ckpt_dir, sharded_strategy=load_strategy) 55 | 56 | return state_dict 57 | -------------------------------------------------------------------------------- /verl/verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Borrowed from: https://huggingface.co/spaces/codeparrot/apps_metric/blob/main/utils.py 16 | 17 | import multiprocessing 18 | import os 19 | import sys 20 | import traceback 21 | from typing import Optional 22 | 23 | from .testing_util import run_test 24 | 25 | 26 | def _temp_run(sample, generation, debug, result, metadata_list, timeout): 27 | with open(os.devnull, "w") as devnull: 28 | sys.stdout = devnull 29 | sys.stderr = devnull 30 | try: 31 | res, metadata = run_test(in_outs=sample, test=generation, debug=debug, timeout=timeout) 32 | result.append(res) 33 | metadata_list.append(metadata) 34 | except Exception: 35 | # print(e) # some tracebacks are extremely long. 36 | traceback.print_exc(10) 37 | result.append([-1 for i in range(len(sample["inputs"]))]) 38 | metadata_list.append({}) 39 | 40 | 41 | def check_correctness(in_outs: Optional[dict], generation, timeout=10, debug=True): 42 | """Check correctness of code generation with a global timeout. 43 | The global timeout is to catch some extreme/rare cases not handled by the timeouts 44 | inside `run_test`""" 45 | 46 | manager = multiprocessing.Manager() 47 | result = manager.list() 48 | metadata_list = manager.list() 49 | p = multiprocessing.Process(target=_temp_run, args=(in_outs, generation, debug, result, metadata_list, timeout)) 50 | p.start() 51 | p.join(timeout=timeout + 1) 52 | if p.is_alive(): 53 | p.kill() 54 | # p.terminate() 55 | if not result: 56 | # consider that all tests failed 57 | result = [[-1 for i in range(len(in_outs["inputs"]))]] 58 | if debug: 59 | print("global timeout") 60 | return result[0], metadata_list 61 | -------------------------------------------------------------------------------- /verl/examples/sglang_multiturn/tool_config.yaml: -------------------------------------------------------------------------------- 1 | tools: 2 | - class_name: "verl.tools.equation_evaluator_tool.EquationEvaluatorTool" 3 | config: 4 | sandbox_fusion_urls: 5 | - "http://0.0.0.0:8010/run_code" 6 | - "http://0.0.0.0:8020/run_code" 7 | - "http://0.0.0.0:8030/run_code" 8 | - "http://0.0.0.0:8040/run_code" 9 | - "http://0.0.0.0:8050/run_code" 10 | - "http://0.0.0.0:8060/run_code" 11 | - "http://0.0.0.0:8070/run_code" 12 | - "http://0.0.0.0:8080/run_code" 13 | default_timeout: 30 14 | default_language: "python" 15 | memory_limit_mb: 1024 16 | type: native 17 | 18 | tool_schema: 19 | type: "function" 20 | function: 21 | name: "equation_evaluator" 22 | description: "Accepts a mathematical equation as a Python function string, optimizes its parameters to fit a dataset using the BFGS method, and returns performance metrics (MSE, NMSE, MAPE) to evaluate its goodness of fit." 23 | parameters: 24 | type: "object" 25 | properties: 26 | equation: 27 | type: "string" 28 | description: "The equation to evaluate, provided as a complete Python function string." 29 | required: ["equation"] 30 | 31 | 32 | - class_name: "verl.tools.data_analyzer_tool.DataAnalyzerTool" 33 | config: 34 | sandbox_fusion_urls: 35 | - "http://0.0.0.0:8010/run_code" 36 | - "http://0.0.0.0:8020/run_code" 37 | - "http://0.0.0.0:8030/run_code" 38 | - "http://0.0.0.0:8040/run_code" 39 | - "http://0.0.0.0:8050/run_code" 40 | - "http://0.0.0.0:8060/run_code" 41 | - "http://0.0.0.0:8070/run_code" 42 | - "http://0.0.0.0:8080/run_code" 43 | default_timeout: 30 44 | default_language: "python" 45 | memory_limit_mb: 1024 46 | type: native 47 | 48 | tool_schema: 49 | type: "function" 50 | function: 51 | name: "data_analyzer" 52 | description: "Executes Python code for data analysis and exploration on a given dataset to inspect for relationships or anomalies. This tool does not support data visualization or plotting libraries like Matplotlib." 53 | parameters: 54 | type: "object" 55 | properties: 56 | code: 57 | type: "string" 58 | description: "The Python code snippet for data analysis to execute." 59 | required: ["code"] 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /verl/.github/workflows/README.md: -------------------------------------------------------------------------------- 1 | ### Adding a New Workflow 2 | 3 | When adding a new workflow for continuous integration (CI), you have two runner options: a fixed runner or a machine from the vemlp. 4 | 5 | - **Fixed Runner**: To use a fixed runner, specify it in your workflow using the `runs-on` keyword, like `runs-on: [L20x8]`. 6 | - **Vemlp Runner**: Opting for a Vemlp machine allows you to launch tasks elastically. 7 | 8 | Here is a template to assist you. This template is designed for using Vemlp machines. Currently, for each workflow, you need to create a `setup` and a `cleanup` job. When using this template, the main parts you need to modify are the `IMAGE` environment variable and the specific `job steps`. 9 | 10 | ```yaml 11 | name: Your Default Workflow 12 | 13 | on: 14 | push: 15 | branches: 16 | - main 17 | - v0.* 18 | pull_request: 19 | branches: 20 | - main 21 | - v0.* 22 | paths: 23 | - "**/*.py" 24 | - ".github/workflows/template.yml" 25 | 26 | concurrency: 27 | group: ${{ github.workflow }}-${{ github.ref }} 28 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 29 | 30 | permissions: 31 | contents: read 32 | 33 | env: 34 | IMAGE: "your vemlp image" # e.g. "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.4-vllm0.8.5-mcore0.12.2" 35 | DYNAMIC_RUNNER_URL: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner" # public veFaas api 36 | 37 | jobs: 38 | setup: 39 | if: github.repository_owner == 'volcengine' 40 | runs-on: ubuntu-latest 41 | outputs: 42 | runner-label: ${{ steps.create-runner.outputs.runner-label }} 43 | task-id: ${{ steps.create-runner.outputs.task-id }} 44 | steps: 45 | - uses: actions/checkout@v4 46 | - id: create-runner 47 | uses: volcengine/vemlp-github-runner@v1 48 | with: 49 | mode: "create" 50 | faas-url: "${{ env.DYNAMIC_RUNNER_URL }}" 51 | image: "${{ env.DEFAULT_IMAGE }}" 52 | 53 | your_job: 54 | needs: setup 55 | runs-on: ["${{ needs.setup.outputs.runner-label || 'default-runner' }}"] 56 | steps: 57 | xxxx # your jobs 58 | 59 | cleanup: 60 | runs-on: ubuntu-latest 61 | needs: [setup, your_job] 62 | if: always() 63 | steps: 64 | - id: destroy-runner 65 | uses: volcengine/vemlp-github-runner@v1 66 | with: 67 | mode: "destroy" 68 | faas-url: "${{ env.DYNAMIC_RUNNER_URL }}" 69 | task-id: "${{ needs.setup.outputs.task-id }}" -------------------------------------------------------------------------------- /verl/verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import importlib 16 | import logging 17 | import os 18 | from importlib.metadata import PackageNotFoundError 19 | from importlib.metadata import version as get_version 20 | 21 | from packaging.version import parse as parse_version 22 | 23 | from .protocol import DataProto 24 | from .utils.device import is_npu_available 25 | from .utils.logging_utils import set_basic_config 26 | 27 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 28 | 29 | with open(os.path.join(version_folder, "version/version")) as f: 30 | __version__ = f.read().strip() 31 | 32 | 33 | set_basic_config(level=logging.WARNING) 34 | 35 | 36 | __all__ = ["DataProto", "__version__"] 37 | 38 | if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true": 39 | if importlib.util.find_spec("modelscope") is None: 40 | raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`") 41 | # Patch hub to download models from modelscope to speed up. 42 | from modelscope.utils.hf_util import patch_hub 43 | 44 | patch_hub() 45 | 46 | if is_npu_available: 47 | from .models.transformers import npu_patch as npu_patch 48 | 49 | package_name = "transformers" 50 | required_version_spec = "4.52.4" 51 | try: 52 | installed_version = get_version(package_name) 53 | installed = parse_version(installed_version) 54 | required = parse_version(required_version_spec) 55 | 56 | if installed < required: 57 | raise ValueError( 58 | f"{package_name} version >= {required_version_spec} is required on ASCEND NPU, current version is " 59 | f"{installed}." 60 | ) 61 | except PackageNotFoundError as e: 62 | raise ImportError( 63 | f"package {package_name} is not installed, please run pip install {package_name}=={required_version_spec}" 64 | ) from e 65 | -------------------------------------------------------------------------------- /verl/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Adapted from Cruise. 16 | """ 17 | 18 | import torch 19 | 20 | HALF_LIST = [16, "16", "fp16", "float16", torch.float16] 21 | FLOAT_LIST = [32, "32", "fp32", "float32", torch.float32] 22 | BFLOAT_LIST = ["bf16", "bfloat16", torch.bfloat16] 23 | 24 | 25 | class PrecisionType: 26 | """Type of precision used. 27 | 28 | >>> PrecisionType.HALF == 16 29 | True 30 | >>> PrecisionType.HALF in (16, "16") 31 | True 32 | """ 33 | 34 | HALF = "16" 35 | FLOAT = "32" 36 | FULL = "64" 37 | BFLOAT = "bf16" 38 | MIXED = "mixed" 39 | 40 | @staticmethod 41 | def supported_type(precision: str | int) -> bool: 42 | return any(x == precision for x in PrecisionType) 43 | 44 | @staticmethod 45 | def supported_types() -> list[str]: 46 | return [x.value for x in PrecisionType] 47 | 48 | @staticmethod 49 | def is_fp16(precision): 50 | return precision in HALF_LIST 51 | 52 | @staticmethod 53 | def is_fp32(precision): 54 | return precision in FLOAT_LIST 55 | 56 | @staticmethod 57 | def is_bf16(precision): 58 | return precision in BFLOAT_LIST 59 | 60 | @staticmethod 61 | def to_dtype(precision): 62 | if precision in HALF_LIST: 63 | return torch.float16 64 | elif precision in FLOAT_LIST: 65 | return torch.float32 66 | elif precision in BFLOAT_LIST: 67 | return torch.bfloat16 68 | else: 69 | raise RuntimeError(f"unexpected precision: {precision}") 70 | 71 | @staticmethod 72 | def to_str(precision): 73 | if precision == torch.float16: 74 | return "fp16" 75 | elif precision == torch.float32: 76 | return "fp32" 77 | elif precision == torch.bfloat16: 78 | return "bf16" 79 | else: 80 | raise RuntimeError(f"unexpected precision: {precision}") 81 | -------------------------------------------------------------------------------- /verl/verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def get_weight_loader(arch: str): 17 | from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel 18 | 19 | _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = { 20 | "LlamaForCausalLM": load_state_dict_to_megatron_gptmodel, 21 | "Qwen2ForCausalLM": load_state_dict_to_megatron_gptmodel, 22 | } 23 | 24 | if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY: 25 | return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch] 26 | raise ValueError( 27 | f"Model architectures {arch} loader are not supported for now. Supported architectures: " 28 | f"{_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}" 29 | ) 30 | 31 | 32 | def get_weight_saver(arch: str): 33 | from verl.models.mcore.saver import ( 34 | merge_megatron_ckpt_gptmodel, 35 | merge_megatron_ckpt_gptmodel_dpskv3, 36 | merge_megatron_ckpt_gptmodel_mixtral, 37 | merge_megatron_ckpt_gptmodel_qwen2_5_vl, 38 | merge_megatron_ckpt_gptmodel_qwen_moe, 39 | ) 40 | 41 | _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY = { 42 | "LlamaForCausalLM": merge_megatron_ckpt_gptmodel, 43 | "Qwen2ForCausalLM": merge_megatron_ckpt_gptmodel, 44 | "MixtralForCausalLM": merge_megatron_ckpt_gptmodel_mixtral, 45 | "Qwen2MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe, 46 | "Qwen2_5_VLForConditionalGeneration": merge_megatron_ckpt_gptmodel_qwen2_5_vl, 47 | "DeepseekV3ForCausalLM": merge_megatron_ckpt_gptmodel_dpskv3, 48 | "Qwen3ForCausalLM": merge_megatron_ckpt_gptmodel, 49 | "Qwen3MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe, 50 | } 51 | if arch in _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY: 52 | return _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY[arch] 53 | raise ValueError( 54 | f"Model architectures {arch} saver are not supported for now. Supported architectures: " 55 | f"{_MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY.keys()}" 56 | ) 57 | -------------------------------------------------------------------------------- /verl/verl/workers/engine/fsdp/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from torch.distributed.device_mesh import init_device_mesh 15 | 16 | from verl.utils.device import get_device_name 17 | 18 | 19 | def create_device_mesh(world_size, fsdp_size): 20 | """ 21 | Create a device mesh for distributed training based on the world size and FSDP size. 22 | 23 | Args: 24 | world_size (int): Total number of processes in the distributed training setup. 25 | fsdp_size (int): Size of the Fully Sharded Data Parallel (FSDP) group. 26 | 27 | Returns: 28 | torch.distributed.device_mesh.DeviceMesh: The initialized device mesh. 29 | """ 30 | device_name = get_device_name() 31 | if fsdp_size < 0 or fsdp_size >= world_size: 32 | device_mesh = init_device_mesh(device_name, mesh_shape=(world_size,), mesh_dim_names=["fsdp"]) 33 | else: 34 | device_mesh = init_device_mesh( 35 | device_name, mesh_shape=(world_size // fsdp_size, fsdp_size), mesh_dim_names=["ddp", "fsdp"] 36 | ) 37 | return device_mesh 38 | 39 | 40 | def get_sharding_strategy(device_mesh): 41 | """ 42 | Determine the appropriate sharding strategy based on the number of dimensions of the device mesh. 43 | 44 | Args: 45 | device_mesh (torch.distributed.device_mesh.DeviceMesh): The device mesh used for distributed training. 46 | 47 | Returns: 48 | torch.distributed.fsdp.ShardingStrategy: The sharding strategy to be used with FSDP. 49 | 50 | Raises: 51 | NotImplementedError: If the number of dimensions of the device mesh is neither 1 nor 2. 52 | """ 53 | from torch.distributed.fsdp import ShardingStrategy 54 | 55 | if device_mesh.ndim == 1: 56 | sharding_strategy = ShardingStrategy.FULL_SHARD 57 | elif device_mesh.ndim == 2: 58 | sharding_strategy = ShardingStrategy.HYBRID_SHARD 59 | else: 60 | raise NotImplementedError(f"Get device mesh ndim={device_mesh.ndim}, but only support 1 or 2") 61 | return sharding_strategy 62 | -------------------------------------------------------------------------------- /verl/verl/model_merger/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | This module is used to merge huggingface model and test verl checkpoints from FSDP and Megatron backends. 17 | 18 | To merge FSDP checkpoints: 19 | ```sh 20 | python -m verl.model_merger merge \ 21 | --backend fsdp \ 22 | --local_dir checkpoints/verl_fsdp_gsm8k_examples/qwen2_5_0b5_fsdp_saveload/global_step_1/actor \ 23 | --target_dir /path/to/merged_hf_model 24 | ``` 25 | 26 | To merge Megatron checkpoints: 27 | ```sh 28 | python -m verl.model_merger merge \ 29 | --backend megatron \ 30 | --tie-word-embedding \ 31 | --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor \ 32 | --target_dir /path/to/merged_hf_model 33 | ``` 34 | 35 | or use distribtued merge for large models like dpskv3 671B 36 | 37 | ```sh 38 | torchrun --nproc_per_node 1 --nnodes 8 --node_rank ${RANK} -m verl.model_merger merge\ 39 | --backend megatron \ 40 | --local_dir ./checkpoints/global_step_1/actor \ 41 | --target_dir /path/to/merged_hf_model 42 | ``` 43 | 44 | 45 | For more details, please refer to documentation: 46 | https://verl.readthedocs.io/en/latest/advance/checkpoint.html#convert-fsdp-and-megatron-checkpoints-to-huggingface-format-model 47 | """ 48 | 49 | from .base_model_merger import generate_config_from_args, parse_args 50 | 51 | 52 | def main(): 53 | args = parse_args() 54 | config = generate_config_from_args(args) 55 | print(f"config: {config}") 56 | 57 | if config.backend == "fsdp": 58 | from .fsdp_model_merger import FSDPModelMerger 59 | 60 | merger = FSDPModelMerger(config) 61 | elif config.backend == "megatron": 62 | from .megatron_model_merger import MegatronModelMerger 63 | 64 | merger = MegatronModelMerger(config) 65 | else: 66 | raise NotImplementedError(f"Unknown backend: {config.backend}") 67 | 68 | merger.merge_and_save() 69 | merger.cleanup() 70 | 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /verl/scripts/install_vllm_sglang_mcore.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | USE_MEGATRON=${USE_MEGATRON:-1} 4 | USE_SGLANG=${USE_SGLANG:-1} 5 | 6 | export MAX_JOBS=32 7 | 8 | echo "1. install inference frameworks and pytorch they need" 9 | if [ $USE_SGLANG -eq 1 ]; then 10 | pip install "sglang[all]==0.4.6.post1" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir 11 | fi 12 | pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata 13 | 14 | echo "2. install basic packages" 15 | pip install "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \ 16 | "numpy<2.0.0" "pyarrow>=15.0.0" pandas \ 17 | ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \ 18 | pytest py-spy pyext pre-commit ruff tensorboard 19 | 20 | pip install "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1" 21 | 22 | 23 | echo "3. install FlashAttention and FlashInfer" 24 | # Install flash-attn-2.7.4.post1 (cxx11abi=False) 25 | wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \ 26 | pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl 27 | 28 | # Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False) 29 | # vllm-0.8.3 does not support flashinfer>=0.2.3 30 | # see https://github.com/vllm-project/vllm/pull/15777 31 | wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \ 32 | pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl 33 | 34 | 35 | if [ $USE_MEGATRON -eq 1 ]; then 36 | echo "4. install TransformerEngine and Megatron" 37 | echo "Notice that TransformerEngine installation can take very long time, please be patient" 38 | NVTE_FRAMEWORK=pytorch pip3 install --no-deps git+https://github.com/NVIDIA/TransformerEngine.git@v2.2.1 39 | pip3 install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.2 40 | fi 41 | 42 | 43 | echo "5. May need to fix opencv" 44 | pip install opencv-python 45 | pip install opencv-fixer && \ 46 | python -c "from opencv_fixer import AutoFix; AutoFix()" 47 | 48 | 49 | if [ $USE_MEGATRON -eq 1 ]; then 50 | echo "6. Install cudnn python package (avoid being overridden)" 51 | pip install nvidia-cudnn-cu12==9.8.0.87 52 | fi 53 | 54 | echo "Successfully installed all packages" 55 | -------------------------------------------------------------------------------- /verl/.github/workflows/e2e_sppo.yml: -------------------------------------------------------------------------------- 1 | name: e2e_sppo 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.* 10 | paths: 11 | - "**/*.py" 12 | # Other entrypoints 13 | - "!examples/**" 14 | - "!tests/**" 15 | - "!verl/trainer/main_*.py" 16 | - "!verl/trainer/fsdp_sft_trainer.py" 17 | # Other recipes 18 | - "!recipe/**" 19 | # Megatron 20 | - "!verl/workers/**/megatron_*.py" 21 | # Home 22 | - "recipe/sppo" 23 | # Entrypoints 24 | - ".github/workflows/e2e_sppo.yml" 25 | - "examples/data_preprocess/gsm8k.py" 26 | - "tests/special_e2e/run_sppo.sh" 27 | pull_request: 28 | branches: 29 | - main 30 | - v0.* 31 | paths: 32 | - "**/*.py" 33 | # Other entrypoints 34 | - "!examples/**" 35 | - "!tests/**" 36 | - "!verl/trainer/main_*.py" 37 | - "!verl/trainer/fsdp_sft_trainer.py" 38 | # Other recipes 39 | - "!recipe/**" 40 | # Megatron 41 | - "!verl/workers/**/megatron_*.py" 42 | # Home 43 | - "recipe/sppo" 44 | # Entrypoints 45 | - ".github/workflows/e2e_sppo.yml" 46 | - "examples/data_preprocess/gsm8k.py" 47 | - "tests/special_e2e/run_sppo.sh" 48 | 49 | # Declare permissions just read content. 50 | permissions: 51 | contents: read 52 | 53 | # Cancel jobs on the same ref if a new one is triggered 54 | concurrency: 55 | group: ${{ github.workflow }}-${{ github.ref }} 56 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 57 | 58 | jobs: 59 | e2e_sppo: 60 | runs-on: [L20x8] 61 | timeout-minutes: 40 # Increase this timeout value as needed 62 | env: 63 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 64 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 65 | NO_PROXY: "localhost,127.0.0.1,hf-mirror.com" 66 | HF_ENDPOINT: "https://hf-mirror.com" 67 | HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable 68 | container: 69 | image: verlai/verl:app-verl0.5-sglang0.4.9.post6-mcore0.12.2-te2.2 70 | options: --gpus all --shm-size=10g 71 | steps: 72 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 73 | with: 74 | fetch-depth: 0 75 | - name: Install the current repository 76 | run: | 77 | pip3 install -e .[test,gpu,sglang] 78 | - name: Prepare MATH dataset 79 | run: | 80 | python3 examples/data_preprocess/math_dataset.py 81 | - name: Running the E2E test with the SPPO algorithm 82 | run: | 83 | ray stop --force 84 | bash tests/special_e2e/run_sppo.sh 85 | -------------------------------------------------------------------------------- /verl/verl/experimental/agent_loop/single_turn_agent_loop.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import logging 15 | import os 16 | from typing import Any 17 | from uuid import uuid4 18 | 19 | from verl.experimental.agent_loop.agent_loop import AgentLoopBase, AgentLoopOutput, register 20 | from verl.utils.profiler import simple_timer 21 | 22 | logger = logging.getLogger(__file__) 23 | logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN")) 24 | 25 | 26 | @register("single_turn_agent") 27 | class SingleTurnAgentLoop(AgentLoopBase): 28 | """Naive agent loop that only do single turn chat completion.""" 29 | 30 | def __init__(self, *args, **kwargs): 31 | super().__init__(*args, **kwargs) 32 | self.prompt_length = self.config.actor_rollout_ref.rollout.prompt_length 33 | self.response_length = self.config.actor_rollout_ref.rollout.response_length 34 | self.apply_chat_template_kwargs = self.config.data.get("apply_chat_template_kwargs", {}) 35 | 36 | async def run(self, sampling_params: dict[str, Any], **kwargs) -> AgentLoopOutput: 37 | messages = list(kwargs["raw_prompt"]) 38 | 39 | metrics = {} 40 | request_id = uuid4().hex 41 | prompt_ids = await self.loop.run_in_executor( 42 | None, 43 | lambda: self.tokenizer.apply_chat_template( 44 | messages, add_generation_prompt=True, tokenize=True, **self.apply_chat_template_kwargs 45 | ), 46 | ) 47 | 48 | with simple_timer("generate_sequences", metrics): 49 | response_ids = await self.server_manager.generate( 50 | request_id=request_id, prompt_ids=prompt_ids, sampling_params=sampling_params 51 | ) 52 | response_mask = [1] * len(response_ids) 53 | 54 | output = AgentLoopOutput( 55 | prompt_ids=prompt_ids, 56 | response_ids=response_ids[: self.response_length], 57 | response_mask=response_mask[: self.response_length], 58 | multi_modal_data={}, 59 | num_turns=2, 60 | metrics=metrics, 61 | ) 62 | return output 63 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/actor/dp_actor.yaml: -------------------------------------------------------------------------------- 1 | # Format checks enforced on CI: 2 | # 1. Comments must appear above each field. 3 | # 2. There must be a blank line between each field. 4 | # 3. Inline comments (after a field on the same line) are not allowed. 5 | # 4. Indentation level is respected for nested fields. 6 | 7 | # defaults specify the default config from each component 8 | defaults: 9 | 10 | # dp actor config, inheriting from trainer/config/actor/actor.yaml 11 | - actor 12 | 13 | # load the reference default config, then apply the fields in the current yaml 14 | - _self_ 15 | 16 | # Target class for this configuration 17 | _target_: verl.workers.config.FSDPActorConfig 18 | 19 | # TODO(haibin.lin): switch to fsdp2 20 | strategy: fsdp 21 | 22 | # Gradient clipping for actor updates, specific to the strategy. 23 | grad_clip: 1.0 24 | 25 | # Sequence parallelism size for Ulysses-style model parallelism 26 | # oc.select: the default val for ref.ulysses_sequence_parallel_size 27 | ulysses_sequence_parallel_size: 1 28 | 29 | # calculate entropy with chunking to reduce memory peak 30 | entropy_from_logits_with_chunking: False 31 | 32 | # recompute entropy 33 | entropy_checkpointing: False 34 | 35 | # optimizer configs 36 | optim: 37 | 38 | # Target class for this configuration 39 | _target_: verl.workers.config.FSDPOptimizerConfig 40 | 41 | # Minimum LR ratio for cosine schedule 42 | min_lr_ratio: 0.0 43 | 44 | # Number of cosine cycles in LR schedule 45 | num_cycles: 0.5 46 | 47 | # LR warmup style: "constant" or "cosine" 48 | warmup_style: constant 49 | 50 | # configs for FSDP 51 | fsdp_config: 52 | 53 | # Target class for this configuration 54 | _target_: verl.workers.config.FSDPEngineConfig 55 | 56 | # policy for wrapping the model 57 | wrap_policy: 58 | 59 | # Minimum number of parameters to trigger wrapping a layer with FSDP 60 | min_num_params: 0 61 | 62 | # Whether to offload model parameters to CPU (trades speed for memory) 63 | param_offload: false 64 | 65 | # Whether to offload optimizer state to CPU 66 | optimizer_offload: false 67 | 68 | # Only for FSDP2: offload param/grad/optimizer during train 69 | offload_policy: false 70 | 71 | # Only for FSDP2: Reshard after forward pass to reduce memory footprint 72 | reshard_after_forward: true 73 | 74 | # Number of GPUs in each FSDP shard group; -1 means auto 75 | fsdp_size: -1 76 | 77 | # Only for FSDP1: FSDP1 configuration, prefetch the next forward-pass all-gather 78 | # before the current forward computation. 79 | forward_prefetch: False 80 | 81 | # Whether to remove padding tokens in inputs during training 82 | use_remove_padding: ${oc.select:actor_rollout_ref.model.use_remove_padding,false} 83 | -------------------------------------------------------------------------------- /verl/.github/workflows/check-pr-title.yml: -------------------------------------------------------------------------------- 1 | # # Tests layout 2 | 3 | # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance: 4 | # - `tests/trainer` for testing functionality related to `verl/trainer` 5 | # - `tests/models` for testing functionality related to `verl/models` 6 | # - ... 7 | 8 | # There are a few folders with `special_` prefix, created for special purposes: 9 | # - `special_distributed`: unit tests that must run with multiple GPUs 10 | # - `special_e2e`: end-to-end tests with training/generation scripts 11 | # - `special_npu`: tests for NPUs 12 | # - `special_sanity`: a suite of quick sanity tests 13 | # - `special_standalone`: a set of test that are designed to run in dedicated environments 14 | 15 | # Accelerators for tests 16 | # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`. 17 | # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment. 18 | 19 | # # Workflow layout 20 | 21 | # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs: 22 | # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml` 23 | # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml` 24 | # 3. End-to-end tests: `e2e_*.yml` 25 | # 4. Unit tests 26 | # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py` 27 | # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix. 28 | # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when 29 | # - new workflow yaml is added to `.github/workflows` 30 | # - new tests are added to workflow mentioned in 2. 31 | 32 | 33 | on: 34 | pull_request: 35 | types: [opened, edited, synchronize] 36 | 37 | jobs: 38 | check-title: 39 | runs-on: ubuntu-latest 40 | steps: 41 | - name: Checkout code 42 | uses: actions/checkout@v4 43 | 44 | - name: Set up Python 45 | uses: actions/setup-python@v5 46 | with: 47 | python-version: '3.11' 48 | 49 | - name: Run PR title checker 50 | run: python3 tests/special_sanity/check_pr_title.py 51 | env: 52 | PR_TITLE: ${{ github.event.pull_request.title }} 53 | 54 | - name: Run PR description checker 55 | run: python3 tests/special_sanity/check_pr_description.py 56 | env: 57 | PR_TITLE: ${{ github.event.pull_request.title }} 58 | GITHUB_EVENT_PATH: ${{ github.event_path }} 59 | -------------------------------------------------------------------------------- /verl/.github/workflows/e2e_spin.yml: -------------------------------------------------------------------------------- 1 | name: e2e_spin 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.* 10 | paths: 11 | - "**/*.py" 12 | # Other entrypoints 13 | - "!examples/**" 14 | - "!tests/**" 15 | - "!verl/trainer/main_*.py" 16 | - "!verl/trainer/fsdp_sft_trainer.py" 17 | # Other recipes 18 | - "!recipe/**" 19 | # Megatron 20 | - "!verl/workers/**/megatron_*.py" 21 | # Home 22 | - "recipe/spin" 23 | # Entrypoints 24 | - ".github/workflows/e2e_spin.yml" 25 | - "examples/data_preprocess/gsm8k.py" 26 | - "tests/special_e2e/run_spin.sh" 27 | - "!examples" 28 | pull_request: 29 | branches: 30 | - main 31 | - v0.* 32 | paths: 33 | - "**/*.py" 34 | # Other entrypoints 35 | - "!examples/**" 36 | - "!tests/**" 37 | - "!verl/trainer/main_*.py" 38 | - "!verl/trainer/fsdp_sft_trainer.py" 39 | # Other recipes 40 | - "!recipe/**" 41 | # Megatron 42 | - "!verl/workers/**/megatron_*.py" 43 | # Home 44 | - "recipe/spin" 45 | # Entrypoints 46 | - ".github/workflows/e2e_spin.yml" 47 | - "examples/data_preprocess/gsm8k.py" 48 | - "tests/special_e2e/run_spin.sh" 49 | - "!examples" 50 | 51 | # Declare permissions just read content. 52 | permissions: 53 | contents: read 54 | 55 | # Cancel jobs on the same ref if a new one is triggered 56 | concurrency: 57 | group: ${{ github.workflow }}-${{ github.ref }} 58 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 59 | 60 | jobs: 61 | e2e_spin: 62 | runs-on: [L20x8] 63 | timeout-minutes: 40 # Increase this timeout value as needed 64 | env: 65 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 66 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 67 | NO_PROXY: "localhost,127.0.0.1,hf-mirror.com" 68 | HF_ENDPOINT: "https://hf-mirror.com" 69 | HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable 70 | container: 71 | image: verlai/verl:app-verl0.5-sglang0.4.9.post6-mcore0.12.2-te2.2 72 | options: --gpus all --shm-size=10g 73 | steps: 74 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 75 | with: 76 | fetch-depth: 0 77 | - name: Install the current repository 78 | run: | 79 | pip3 install -e .[test,gpu,sglang] 80 | - name: Prepare GSM8K dataset 81 | run: | 82 | python3 examples/data_preprocess/gsm8k.py 83 | - name: Running the E2E test with the spin algorithm 84 | run: | 85 | ray stop --force 86 | bash tests/special_e2e/run_spin.sh 87 | -------------------------------------------------------------------------------- /verl/.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### What does this PR do? 2 | 3 | > Add **concise** overview of what this PR aims to achieve or accomplish. Reference related GitHub issues and PRs that help with the review. 4 | 5 | ### Checklist Before Starting 6 | 7 | - [ ] Search for similar PRs. Paste at least one query link here: ... 8 | - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) 9 | - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` 10 | - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` 11 | - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` 12 | - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. 13 | - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` 14 | 15 | ### Test 16 | 17 | > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc. 18 | 19 | ### API and Usage Example 20 | 21 | > Demonstrate how the API changes if any, and provide usage example(s) if possible. 22 | 23 | ```python 24 | # Add code snippet or script demonstrating how to use this 25 | ``` 26 | 27 | ### Design & Code Changes 28 | 29 | > Demonstrate the high-level design if this PR is complex, and list the specific changes. 30 | 31 | ### Checklist Before Submitting 32 | 33 | > [!IMPORTANT] 34 | > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. 35 | 36 | - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). 37 | - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` 38 | - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). 39 | - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... 40 | - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).) 41 | -------------------------------------------------------------------------------- /verl/verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/linear.py 15 | 16 | 17 | from megatron.core import tensor_parallel 18 | 19 | 20 | class QKVParallelLinear(tensor_parallel.ColumnParallelLinear): 21 | def __init__( 22 | self, 23 | input_size, 24 | num_heads, 25 | num_key_value_heads, 26 | head_dim, 27 | *, 28 | bias=True, 29 | gather_output=True, 30 | skip_bias_add=False, 31 | **kwargs, 32 | ): 33 | # Keep input parameters, and already restrict the head numbers 34 | self.input_size = input_size 35 | self.q_output_size = num_heads * head_dim 36 | self.kv_output_size = num_key_value_heads * head_dim 37 | self.head_dim = head_dim 38 | self.gather_output = gather_output 39 | self.skip_bias_add = skip_bias_add 40 | 41 | input_size = self.input_size 42 | output_size = (num_heads + 2 * num_key_value_heads) * self.head_dim 43 | 44 | super().__init__( 45 | input_size=input_size, 46 | output_size=output_size, 47 | bias=bias, 48 | gather_output=gather_output, 49 | skip_bias_add=skip_bias_add, 50 | **kwargs, 51 | ) 52 | 53 | 54 | class MergedColumnParallelLinear(tensor_parallel.ColumnParallelLinear): 55 | def __init__( 56 | self, 57 | input_size, 58 | gate_ouput_size, 59 | up_output_size, 60 | *, 61 | bias=True, 62 | gather_output=True, 63 | skip_bias_add=False, 64 | **kwargs, 65 | ): 66 | # Keep input parameters, and already restrict the head numbers 67 | self.input_size = input_size 68 | self.output_size = gate_ouput_size + up_output_size 69 | self.gather_output = gather_output 70 | self.skip_bias_add = skip_bias_add 71 | 72 | super().__init__( 73 | input_size=self.input_size, 74 | output_size=self.output_size, 75 | bias=bias, 76 | gather_output=gather_output, 77 | skip_bias_add=skip_bias_add, 78 | **kwargs, 79 | ) 80 | -------------------------------------------------------------------------------- /verl/verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import time 17 | 18 | import ray 19 | from cupy.cuda.nccl import NcclCommunicator, get_unique_id 20 | from ray.util import list_named_actors 21 | 22 | 23 | @ray.remote 24 | class NCCLIDStore: 25 | def __init__(self, nccl_id): 26 | self._nccl_id = nccl_id 27 | 28 | def get(self): 29 | return self._nccl_id 30 | 31 | 32 | def get_nccl_id_store_by_name(name): 33 | all_actors = list_named_actors(all_namespaces=True) 34 | matched_actors = [actor for actor in all_actors if actor.get("name", None) == name] 35 | if len(matched_actors) == 1: 36 | actor = matched_actors[0] 37 | return ray.get_actor(**actor) 38 | elif len(matched_actors) > 1: 39 | logging.warning("multiple actors with same name found: %s", matched_actors) 40 | elif len(matched_actors) == 0: 41 | logging.info("failed to get any actor named %s", name) 42 | return None 43 | 44 | 45 | def create_nccl_communicator_in_ray( 46 | rank: int, world_size: int, group_name: str, max_retries: int = 100, interval_s: int = 5 47 | ): 48 | if rank == 0: 49 | nccl_id = get_unique_id() 50 | nccl_id_store = NCCLIDStore.options(name=group_name).remote(nccl_id) 51 | 52 | assert ray.get(nccl_id_store.get.remote()) == nccl_id 53 | communicator = NcclCommunicator( 54 | ndev=world_size, 55 | commId=nccl_id, 56 | rank=0, 57 | ) 58 | return communicator 59 | else: 60 | for i in range(max_retries): 61 | nccl_id_store = get_nccl_id_store_by_name(group_name) 62 | if nccl_id_store is not None: 63 | logging.info("nccl_id_store %s got", group_name) 64 | nccl_id = ray.get(nccl_id_store.get.remote()) 65 | logging.info("nccl id for %s got: %s", group_name, nccl_id) 66 | communicator = NcclCommunicator( 67 | ndev=world_size, 68 | commId=nccl_id, 69 | rank=rank, 70 | ) 71 | return communicator 72 | logging.info("failed to get nccl_id for %d time, sleep for %d seconds", i + 1, interval_s) 73 | time.sleep(interval_s) 74 | -------------------------------------------------------------------------------- /verl/.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. They are provided 2 | # by a third-party and are governed by separate terms of service, privacy 3 | # policy, and support documentation. 4 | 5 | name: Scorecard supply-chain security 6 | on: 7 | # For Branch-Protection check. Only the default branch is supported. See 8 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 9 | branch_protection_rule: 10 | # To guarantee Maintained check is occasionally updated. See 11 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained 12 | schedule: 13 | - cron: "27 7 * * 1" 14 | push: 15 | branches: 16 | - main 17 | - v0.* 18 | 19 | # Declare default permissions as read only. 20 | permissions: read-all 21 | 22 | jobs: 23 | analysis: 24 | name: Scorecard analysis 25 | runs-on: ubuntu-latest 26 | permissions: 27 | # Needed to upload the results to code-scanning dashboard. 28 | security-events: write 29 | # Needed to publish results and get a badge (see publish_results below). 30 | id-token: write 31 | # Uncomment the permissions below if installing in a private repository. 32 | # contents: read 33 | # actions: read 34 | 35 | steps: 36 | - name: "Checkout code" 37 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 38 | with: 39 | persist-credentials: false 40 | 41 | - name: "Run analysis" 42 | uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 43 | with: 44 | results_file: results.sarif 45 | results_format: sarif 46 | # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: 47 | # - you want to enable the Branch-Protection check on a *public* repository, or 48 | # - you are installing Scorecard on a *private* repository 49 | # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. 50 | # repo_token: ${{ secrets.SCORECARD_TOKEN }} 51 | 52 | # Public repositories: 53 | # - Publish results to OpenSSF REST API for easy access by consumers 54 | # - Allows the repository to include the Scorecard badge. 55 | # - See https://github.com/ossf/scorecard-action#publishing-results. 56 | # For private repositories: 57 | # - `publish_results` will always be set to `false`, regardless 58 | # of the value entered here. 59 | publish_results: true 60 | 61 | # Upload the results to GitHub's code scanning dashboard (optional). 62 | # Commenting out will disable upload of results to your repo's Code Scanning dashboard 63 | - name: "Upload to code-scanning" 64 | uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 #v3.28.9 65 | with: 66 | sarif_file: results.sarif 67 | -------------------------------------------------------------------------------- /verl/verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | from megatron.core import parallel_state as mpu 18 | 19 | from .sequence_parallel import pad_to_sequence_parallel 20 | 21 | 22 | def compute_transformers_input_shapes(batches, meta_info): 23 | from flash_attn.bert_padding import unpad_input # flash 2 is a must for Megatron 24 | 25 | # pre-compute input shapes for each micro-batch at each pp stage 26 | input_shapes = [] 27 | for model_inputs in batches: 28 | input_ids = model_inputs["input_ids"] 29 | attention_mask = model_inputs["attention_mask"] 30 | input_ids_rmpad = unpad_input(input_ids.unsqueeze(dim=-1), attention_mask)[0] # (total_nnz, 1) 31 | if meta_info["sequence_parallel"]: 32 | input_ids_rmpad = pad_to_sequence_parallel(input_ids_rmpad) 33 | # compute shapes for model_inputs 34 | input_shapes.append( 35 | torch.Size( 36 | [ 37 | input_ids_rmpad.shape[0] // mpu.get_tensor_model_parallel_world_size(), 38 | 1, 39 | meta_info["hidden_size"], 40 | ] 41 | ) 42 | ) 43 | else: 44 | # compute shapes for model_inputs 45 | input_shapes.append(torch.Size([input_ids_rmpad.shape[0], 1, meta_info["hidden_size"]])) 46 | return input_shapes 47 | 48 | 49 | def make_batch_generator(batches, vpp_size): 50 | """ 51 | Creates a batch generator suitable for Megatron pipeline parallelism, 52 | handling virtual pipeline parallelism (VPP). 53 | 54 | If VPP is used (vpp_size > 1), it duplicates the batch iterator for each 55 | virtual pipeline stage. Otherwise, it returns a single iterator. 56 | 57 | Args: 58 | batches: An iterable (e.g., list) of micro-batches. 59 | vpp_size (int): The virtual pipeline model parallel size. 60 | 61 | Returns: 62 | An iterator or a list of iterators over the micro-batches. 63 | """ 64 | if vpp_size > 1: 65 | # has vpp 66 | batch_generator = [batches] * vpp_size # number of vpp chunks 67 | batch_generator = [iter(b) for b in batch_generator] 68 | else: 69 | # no vpp 70 | batch_generator = iter(batches) 71 | return batch_generator 72 | -------------------------------------------------------------------------------- /verl/verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | _SOLUTION_CLIP_CHARS = 300 18 | 19 | 20 | def extract_solution(solution_str, method="strict"): 21 | assert method in ["strict", "flexible"] 22 | 23 | # Optimization: Regular expression matching on very long strings can be slow. 24 | # For math problems, the final answer is usually at the end. 25 | # We only match on the last 300 characters, which is a safe approximation for 300 tokens. 26 | if len(solution_str) > _SOLUTION_CLIP_CHARS: 27 | solution_str = solution_str[-_SOLUTION_CLIP_CHARS:] 28 | 29 | if method == "strict": 30 | # this also tests the formatting of the model 31 | solutions = re.findall("#### (\\-?[0-9\\.\\,]+)", solution_str) 32 | if len(solutions) == 0: 33 | final_answer = None 34 | else: 35 | # take the last solution 36 | final_answer = solutions[-1].replace(",", "").replace("$", "") 37 | elif method == "flexible": 38 | answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str) 39 | final_answer = None 40 | if len(answer) == 0: 41 | # no reward is there is no answer 42 | pass 43 | else: 44 | invalid_str = ["", "."] 45 | # find the last number that is not '.' 46 | for final_answer in reversed(answer): 47 | if final_answer not in invalid_str: 48 | break 49 | return final_answer 50 | 51 | 52 | def compute_score(solution_str, ground_truth, method="strict", format_score=0.0, score=1.0): 53 | """The scoring function for GSM8k. 54 | 55 | Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual 56 | Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024. 57 | 58 | Args: 59 | solution_str: the solution text 60 | ground_truth: the ground truth 61 | method: the method to extract the solution, choices are 'strict' and 'flexible' 62 | format_score: the score for the format 63 | score: the score for the correct answer 64 | """ 65 | answer = extract_solution(solution_str=solution_str, method=method) 66 | if answer is None: 67 | return 0 68 | else: 69 | if answer == ground_truth: 70 | return score 71 | else: 72 | return format_score 73 | -------------------------------------------------------------------------------- /verl/verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu 4 | micro_batch_size_per_gpu: 4 # this is also val batch size 5 | train_files: ~/data/gsm8k/train.parquet 6 | val_files: ~/data/gsm8k/test.parquet 7 | # Single-turn settings 8 | prompt_key: question 9 | response_key: answer 10 | prompt_dict_keys: null 11 | response_dict_keys: null 12 | # Multi-turn settings 13 | multiturn: 14 | enable: false # Set to true to use multi-turn dataset 15 | messages_key: messages # Key for messages list in multi-turn mode 16 | tools_key: tools # Key for tools list in multi-turn mode 17 | enable_thinking_key: enable_thinking # Whether to enable thinking in multi-turn mode 18 | max_length: 1024 19 | truncation: error 20 | balance_dp_token: False 21 | chat_template: null 22 | custom_cls: 23 | path: null 24 | name: null 25 | use_shm: False 26 | apply_chat_template_kwargs: {} 27 | model: 28 | partial_pretrain: ~/models/gemma-1.1-7b-it 29 | use_shm: False 30 | fsdp_config: 31 | model_dtype: fp32 32 | wrap_policy: 33 | min_num_params: 0 34 | cpu_offload: False 35 | offload_params: False 36 | external_lib: null 37 | enable_gradient_checkpointing: True 38 | trust_remote_code: False 39 | lora_rank: 0 # Set to positive value to enable LoRA (e.g., 32) 40 | lora_alpha: 16 # LoRA scaling factor 41 | target_modules: all-linear # Target modules for LoRA adaptation 42 | use_liger: False 43 | strategy: fsdp2 44 | optim: 45 | lr: 1e-5 46 | betas: [0.9, 0.95] 47 | weight_decay: 0.01 48 | warmup_steps_ratio: 0.1 49 | clip_grad: 1.0 50 | lr_scheduler: cosine 51 | ulysses_sequence_parallel_size: 1 52 | use_remove_padding: False 53 | trainer: 54 | default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name} 55 | default_hdfs_dir: null 56 | project_name: gsm8k-sft 57 | experiment_name: test 58 | total_epochs: 4 59 | total_training_steps: null 60 | logger: [ 'console', 'wandb' ] 61 | seed: 1 62 | save_freq: -1 63 | test_freq: -1 64 | nnodes: 1 65 | n_gpus_per_node: 8 66 | max_ckpt_to_keep: null # Maximum number of checkpoints to keep, set to null to keep all 67 | 68 | # Resume mode: "auto", "disable", or "resume_path" 69 | # "auto": resume from last checkpoint if available 70 | # "disable": start from scratch 71 | # "resume_path": resume from a user-defined path 72 | resume_mode: auto 73 | 74 | # Path to resume training from (used when resume_mode is "resume_path" or "auto") 75 | resume_from_path: null 76 | 77 | # Checkpoint configuration 78 | checkpoint: 79 | # What to include in saved checkpoints 80 | # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space 81 | save_contents: ["model", "optimizer", "extra"] 82 | 83 | # For more flexibility, you can specify the contents to load from the checkpoint. 84 | load_contents: ${trainer.checkpoint.save_contents} 85 | device: cuda 86 | -------------------------------------------------------------------------------- /verl/verl/utils/device.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # This code is inspired by the torchtune. 4 | # https://github.com/pytorch/torchtune/blob/main/torchtune/utils/_device.py 5 | # 6 | # Copyright (c) Meta Platforms, Inc. and affiliates. 7 | # All rights reserved. 8 | # 9 | # This source code is licensed under the BSD-style license in https://github.com/pytorch/torchtune/blob/main/LICENSE 10 | 11 | import logging 12 | 13 | import torch 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def is_torch_npu_available() -> bool: 19 | """Check the availability of NPU""" 20 | try: 21 | import torch_npu # noqa: F401 22 | 23 | return torch.npu.is_available() 24 | except ImportError: 25 | return False 26 | 27 | 28 | is_cuda_available = torch.cuda.is_available() 29 | is_npu_available = is_torch_npu_available() 30 | 31 | 32 | def get_visible_devices_keyword() -> str: 33 | """Function that gets visible devices keyword name. 34 | Returns: 35 | 'CUDA_VISIBLE_DEVICES' or `ASCEND_RT_VISIBLE_DEVICES` 36 | """ 37 | return "CUDA_VISIBLE_DEVICES" if is_cuda_available else "ASCEND_RT_VISIBLE_DEVICES" 38 | 39 | 40 | def get_device_name() -> str: 41 | """Function that gets the torch.device based on the current machine. 42 | This currently only supports CPU, CUDA, NPU. 43 | Returns: 44 | device 45 | """ 46 | if is_cuda_available: 47 | device = "cuda" 48 | elif is_npu_available: 49 | device = "npu" 50 | else: 51 | device = "cpu" 52 | return device 53 | 54 | 55 | def get_torch_device() -> any: 56 | """Return the corresponding torch attribute based on the device type string. 57 | Returns: 58 | module: The corresponding torch device namespace, or torch.cuda if not found. 59 | """ 60 | device_name = get_device_name() 61 | try: 62 | return getattr(torch, device_name) 63 | except AttributeError: 64 | logger.warning(f"Device namespace '{device_name}' not found in torch, try to load torch.cuda.") 65 | return torch.cuda 66 | 67 | 68 | def get_device_id() -> int: 69 | """Return current device id based on the device type. 70 | Returns: 71 | device index 72 | """ 73 | return get_torch_device().current_device() 74 | 75 | 76 | def get_nccl_backend() -> str: 77 | """Return nccl backend type based on the device type. 78 | Returns: 79 | nccl backend type string. 80 | """ 81 | if is_cuda_available: 82 | return "nccl" 83 | elif is_npu_available: 84 | return "hccl" 85 | else: 86 | raise RuntimeError(f"No available nccl backend found on device type {get_device_name()}.") 87 | 88 | 89 | def set_expandable_segments(enable: bool) -> None: 90 | """Enable or disable expandable segments for cuda. 91 | Args: 92 | enable (bool): Whether to enable expandable segments. Used to avoid OOM. 93 | """ 94 | if is_cuda_available: 95 | torch.cuda.memory._set_allocator_settings(f"expandable_segments:{enable}") 96 | -------------------------------------------------------------------------------- /verl/verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import is_dataclass 16 | from typing import Any, Optional 17 | 18 | from omegaconf import DictConfig, ListConfig, OmegaConf 19 | 20 | __all__ = ["omega_conf_to_dataclass"] 21 | 22 | 23 | def omega_conf_to_dataclass(config: DictConfig | dict, dataclass_type: Optional[type[Any]] = None) -> Any: 24 | """ 25 | Convert an OmegaConf DictConfig to a dataclass. 26 | 27 | Args: 28 | config: The OmegaConf DictConfig or dict to convert. 29 | dataclass_type: The dataclass type to convert to. When dataclass_type is None, 30 | the DictConfig must contain _target_ to be instantiated via hydra.instantiate API. 31 | 32 | Returns: 33 | The dataclass instance. 34 | """ 35 | # Got an empty config 36 | if not config: 37 | return dataclass_type if dataclass_type is None else dataclass_type() 38 | # Got an object 39 | if not isinstance(config, DictConfig | ListConfig | dict | list): 40 | return config 41 | 42 | if dataclass_type is None: 43 | assert "_target_" in config, ( 44 | "When dataclass_type is not provided, config must contain _target_. " 45 | "See trainer/config/ppo_trainer.yaml algorithm section for an example. " 46 | f"Got config: {config}" 47 | ) 48 | from hydra.utils import instantiate 49 | 50 | return instantiate(config, _convert_="partial") 51 | 52 | if not is_dataclass(dataclass_type): 53 | raise ValueError(f"{dataclass_type} must be a dataclass") 54 | cfg = OmegaConf.create(config) # in case it's a dict 55 | # pop _target_ to avoid hydra instantiate error, as most dataclass do not have _target_ 56 | # Updated (vermouth1992) We add _target_ to BaseConfig so that it is compatible. 57 | # Otherwise, this code path can't support recursive instantiation. 58 | # if "_target_" in cfg: 59 | # cfg.pop("_target_") 60 | cfg_from_dataclass = OmegaConf.structured(dataclass_type) 61 | # let cfg override the existing vals in `cfg_from_dataclass` 62 | cfg_merged = OmegaConf.merge(cfg_from_dataclass, cfg) 63 | # now convert to `dataclass_type` 64 | config_object = OmegaConf.to_object(cfg_merged) 65 | return config_object 66 | 67 | 68 | def update_dict_with_config(dictionary: dict, config: DictConfig): 69 | for key in dictionary: 70 | if hasattr(config, key): 71 | dictionary[key] = getattr(config, key) 72 | -------------------------------------------------------------------------------- /verl/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains a resharding manager that binds weights from FSDP zero3 to XPerfGPT 16 | """ 17 | 18 | from torch.distributed.device_mesh import DeviceMesh 19 | 20 | from verl import DataProto 21 | from verl.protocol import all_gather_data_proto 22 | from verl.utils.ulysses import get_ulysses_sequence_parallel_group, set_ulysses_sequence_parallel_group 23 | 24 | from .base import BaseShardingManager 25 | 26 | 27 | class FSDPUlyssesShardingManager(BaseShardingManager): 28 | """ 29 | Sharding manager to support data resharding when using FSDP + Ulysses 30 | """ 31 | 32 | def __init__(self, device_mesh: DeviceMesh): 33 | super().__init__() 34 | self.device_mesh = device_mesh 35 | self.seed_offset = 12345 36 | 37 | def __enter__(self): 38 | if self.device_mesh is not None: 39 | # We have a global SP group 40 | # so we have to change to use model-specific sp group 41 | self.prev_sp_group = get_ulysses_sequence_parallel_group() 42 | set_ulysses_sequence_parallel_group(self.device_mesh["sp"].get_group()) 43 | # TODO: check how to set seed for each model 44 | 45 | def __exit__(self, exc_type, exc_value, traceback): 46 | # restore random states 47 | if self.device_mesh is not None: 48 | # revert to previous sp group 49 | set_ulysses_sequence_parallel_group(self.prev_sp_group) 50 | # TODO: check how to set seed for each model 51 | 52 | def preprocess_data(self, data: DataProto) -> DataProto: 53 | """ 54 | AllGather data from sp region 55 | This is because the data is first sharded along the FSDP dimension as we utilize the DP_COMPUTE 56 | In Ulysses, we need to make sure the same data is used across a SP group 57 | """ 58 | if self.device_mesh is not None: 59 | group = self.device_mesh["sp"].get_group() 60 | 61 | all_gather_data_proto(data=data, process_group=group) 62 | return data 63 | 64 | def postprocess_data(self, data: DataProto) -> DataProto: 65 | """ 66 | Split the data to follow FSDP partition 67 | """ 68 | if self.device_mesh is not None: 69 | sp_size = self.device_mesh["sp"].size() 70 | sp_rank = self.device_mesh["sp"].get_local_rank() 71 | data = data.chunk(chunks=sp_size)[sp_rank] 72 | return data 73 | -------------------------------------------------------------------------------- /verl/verl/trainer/main_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Offline evaluate the performance of a generated file using reward model and ground truth verifier. 16 | The input is a parquet file that contains N generated sequences and (optional) the ground truth. 17 | 18 | """ 19 | 20 | from collections import defaultdict 21 | 22 | import hydra 23 | import numpy as np 24 | import pandas as pd 25 | import ray 26 | from omegaconf import OmegaConf 27 | from tqdm import tqdm 28 | 29 | from verl.trainer.ppo.reward import get_custom_reward_fn 30 | from verl.utils.fs import copy_to_local 31 | 32 | 33 | @ray.remote 34 | def process_item(reward_fn, data_source, response_lst, reward_data): 35 | ground_truth = reward_data["ground_truth"] 36 | score_lst = [reward_fn(data_source, r, ground_truth) for r in response_lst] 37 | return data_source, np.mean(score_lst) 38 | 39 | 40 | @hydra.main(config_path="config", config_name="evaluation", version_base=None) 41 | def main(config): 42 | local_path = copy_to_local(config.data.path, use_shm=config.data.get("use_shm", False)) 43 | dataset = pd.read_parquet(local_path) 44 | responses = dataset[config.data.response_key] 45 | data_sources = dataset[config.data.data_source_key] 46 | reward_model_data = dataset[config.data.reward_model_key] 47 | 48 | total = len(dataset) 49 | 50 | # Initialize Ray 51 | if not ray.is_initialized(): 52 | ray.init(**OmegaConf.to_container(config.ray_kwargs.get("ray_init", {}))) 53 | 54 | # evaluate test_score based on data source 55 | data_source_reward = defaultdict(list) 56 | compute_score = get_custom_reward_fn(config) 57 | 58 | # Create remote tasks 59 | remote_tasks = [ 60 | process_item.remote(compute_score, data_sources[i], responses[i], reward_model_data[i]) for i in range(total) 61 | ] 62 | 63 | # Process results as they come in 64 | with tqdm(total=total) as pbar: 65 | while len(remote_tasks) > 0: 66 | # Use ray.wait to get completed tasks 67 | done_ids, remote_tasks = ray.wait(remote_tasks) 68 | for result_id in done_ids: 69 | data_source, score = ray.get(result_id) 70 | data_source_reward[data_source].append(score) 71 | pbar.update(1) 72 | 73 | metric_dict = {} 74 | for data_source, rewards in data_source_reward.items(): 75 | metric_dict[f"test_score/{data_source}"] = np.mean(rewards) 76 | 77 | print(metric_dict) 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | --------------------------------------------------------------------------------