├── sandbox ├── __init__.py └── README.md ├── verl ├── version │ └── version ├── trainer │ ├── runtime_env.yaml │ ├── config │ │ ├── evaluation.yaml │ │ ├── generation.yaml │ │ └── sft_trainer.yaml │ ├── __init__.py │ └── ppo │ │ └── __init__.py ├── models │ ├── __init__.py │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── checkpoint_utils │ │ │ └── __init__.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── __init__.py │ ├── qwen2 │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── checkpoint_utils │ │ │ └── __init__.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── __init__.py │ ├── transformers │ │ └── __init__.py │ └── README.md ├── utils │ ├── checkpoint │ │ └── __init__.py │ ├── logger │ │ ├── __init__.py │ │ └── aggregate_logger.py │ ├── megatron │ │ ├── __init__.py │ │ ├── memory.py │ │ ├── optimizer.py │ │ └── sequence_parallel.py │ ├── rendezvous │ │ └── __init__.py │ ├── debug │ │ ├── __init__.py │ │ └── performance.py │ ├── __init__.py │ ├── dataset │ │ ├── __init__.py │ │ └── README.md │ ├── config.py │ ├── logging_utils.py │ ├── distributed.py │ ├── reward_score │ │ ├── geo3k.py │ │ └── math_verify.py │ ├── ray_utils.py │ └── py_functional.py ├── workers │ ├── __init__.py │ ├── rollout │ │ ├── sglang_rollout │ │ │ └── __init__.py │ │ ├── naive │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── base.py │ │ └── vllm_rollout │ │ │ └── __init__.py │ ├── reward_model │ │ ├── __init__.py │ │ ├── megatron │ │ │ └── __init__.py │ │ └── base.py │ ├── reward_manager │ │ └── __init__.py │ ├── actor │ │ └── __init__.py │ ├── critic │ │ ├── __init__.py │ │ └── base.py │ └── sharding_manager │ │ └── base.py ├── third_party │ ├── __init__.py │ ├── vllm │ │ ├── vllm_v_0_3_1 │ │ │ └── __init__.py │ │ ├── vllm_v_0_4_2 │ │ │ └── __init__.py │ │ ├── vllm_v_0_5_4 │ │ │ ├── __init__.py │ │ │ └── hf_weight_loader.py │ │ └── vllm_v_0_6_3 │ │ │ ├── __init__.py │ │ │ ├── tokenizer.py │ │ │ └── hf_weight_loader.py │ └── sglang │ │ └── __init__.py ├── single_controller │ ├── base │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ └── worker.py │ │ ├── register_center │ │ │ ├── __init__.py │ │ │ └── ray.py │ │ └── __init__.py │ ├── ray │ │ └── __init__.py │ └── __init__.py └── __init__.py ├── Notice.txt ├── docs ├── _static │ └── logo.png ├── requirements-docs.txt ├── README.md ├── advance │ ├── placement.rst │ └── megatron_extension.rst ├── Makefile └── README_vllm0.8.md ├── datasets ├── deepscaler │ ├── aime.parquet │ ├── aime25.parquet │ └── train.parquet └── simplelr_math_35 │ ├── test.parquet │ └── train.parquet ├── .style.yapf ├── recipe └── simpletir │ ├── assets │ ├── simpletir_curve.png │ ├── simpletir_example.jpg │ └── simpletir_overview.png │ ├── workers │ └── reward_manager │ │ └── __init__.py │ └── utils │ └── reward_score │ └── __init__.py ├── scripts ├── format.sh └── model_merger.sh ├── tests ├── ray │ ├── detached_worker │ │ ├── run.sh │ │ └── README.md │ ├── test_check_worker_alive.py │ ├── test_rvdz.py │ └── test_ray_local_envs.py ├── e2e │ ├── arithmetic_sequence │ │ ├── data │ │ │ ├── test.parquet │ │ │ ├── train.parquet │ │ │ └── create_dataset.py │ │ ├── model │ │ │ ├── model.safetensors │ │ │ ├── generation_config.json │ │ │ ├── tokenizer_config.json │ │ │ └── config.json │ │ └── rl │ │ │ └── README.md │ ├── __init__.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ └── __init__.py │ ├── run_ray_trainer_rmpad.sh │ ├── check_custom_rwd_fn.py │ ├── run_ray_trainer.sh │ ├── run_qwen_gsm8k_function_rm_remax.sh │ ├── run_qwen_gsm8k_function_rm_grpo.sh │ ├── run_ray_trainer_fire_sampling.sh │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_function_rm.sh │ ├── run_qwen_megatron.sh │ ├── check_results.py │ ├── run_qwen_grpo.sh │ └── run_qwen2vl_geo3k_function_rm.sh ├── __init__.py ├── distributed │ └── run_all.sh ├── sft │ ├── run_sft.sh │ ├── run_sft_sp_loss_match.sh │ ├── run_sft_qwen05_sp2_liger.sh │ └── run_sft_qwen05_peft.sh ├── sanity │ ├── test_import.py │ └── check_license.py ├── generation │ └── run_gen_qwen05.sh ├── kill_github_tests.sh ├── verl │ └── utils │ │ └── dataset │ │ └── test_rm_dataset.py └── gpu_utility │ └── test_ops.py ├── .github ├── dependabot.yml └── workflows │ ├── secrets_scan.yml │ ├── pylint.yml │ ├── e2e_digit_completion_fire.yml │ ├── sanity.yml │ ├── sandbox.yml │ ├── e2e_gsm8k_prime.yml │ ├── ray_test.yml │ ├── e2e_ascend.yml │ ├── e2e_vlm_geo3k.yml │ ├── e2e_digit_completion.yml │ ├── yapf_format.yml │ ├── e2e_lora.yml │ └── e2e_sglang_gsm8k.yml ├── requirements.txt ├── requirements_sglang.txt ├── .readthedocs.yaml ├── docker ├── Dockerfile.megatron ├── Dockerfile.rocm ├── Dockerfile.ngc.vllm0.8.sagemaker ├── Dockerfile.vemlp.vllm.te └── Dockerfile.ngc.vllm ├── examples ├── generation │ ├── run_deepseek7b_mutli_node.sh │ └── run_deepseek_v2_lite_math.sh ├── sft │ └── gsm8k │ │ ├── run_gemma_7b.sh │ │ ├── run_gemma_2b.sh │ │ ├── run_deepseek_6b7.sh │ │ ├── run_qwen_05_sp2.sh │ │ ├── run_qwen_05_sp2_liger.sh │ │ └── run_qwen_05_peft.sh ├── split_placement │ └── run_deepseek7b_llm.sh ├── ppo_trainer │ ├── run_gemma.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek7b_llm_modelscope.sh │ └── run_deepseek7b_llm_sp2.sh ├── grpo_trainer │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_deepseek7b_llm.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b.sh │ ├── run_deepseek7b_llm_megatron.sh │ └── run_qwen2-7b_megatron.sh ├── remax_trainer │ ├── run_qwen2.5-3b_seq_balance.sh │ └── run_qwen2.5-7b_seq_balance.sh └── rloo_trainer │ └── run_qwen2-7b.sh └── .gitignore /sandbox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.2.0.dev 2 | -------------------------------------------------------------------------------- /Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/docs/_static/logo.png -------------------------------------------------------------------------------- /datasets/deepscaler/aime.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/datasets/deepscaler/aime.parquet -------------------------------------------------------------------------------- /datasets/deepscaler/aime25.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/datasets/deepscaler/aime25.parquet -------------------------------------------------------------------------------- /datasets/deepscaler/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/datasets/deepscaler/train.parquet -------------------------------------------------------------------------------- /datasets/simplelr_math_35/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/datasets/simplelr_math_35/test.parquet -------------------------------------------------------------------------------- /datasets/simplelr_math_35/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/datasets/simplelr_math_35/train.parquet -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = google 3 | column_limit = 120 4 | indent_width = 4 5 | split_arguments_when_comma_terminated: true -------------------------------------------------------------------------------- /recipe/simpletir/assets/simpletir_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/recipe/simpletir/assets/simpletir_curve.png -------------------------------------------------------------------------------- /recipe/simpletir/assets/simpletir_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/recipe/simpletir/assets/simpletir_example.jpg -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests examples recipe 4 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /recipe/simpletir/assets/simpletir_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/recipe/simpletir/assets/simpletir_overview.png -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/tests/e2e/arithmetic_sequence/data/test.parquet -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/tests/e2e/arithmetic_sequence/data/train.parquet -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/SimpleTIR/HEAD/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "eos_token_id": 1, 4 | "pad_token_id": 2, 5 | "transformers_version": "4.43.3" 6 | } 7 | -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | ## Enabled the dependabot to check the dependencies of the project 2 | ## Dependabot will open pull requests to update dependencies automatically 3 | 4 | version: 2 5 | updates: 6 | - package-ecosystem: pip 7 | directory: "/" 8 | schedule: 9 | interval: weekly -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme 10 | 11 | # pin tokenizers version to avoid env_logger version req 12 | tokenizers==0.19.1 13 | -------------------------------------------------------------------------------- /recipe/simpletir/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | from recipe.simpletir.workers.reward_manager.code import CodeRewardManager 2 | from recipe.simpletir.workers.reward_manager.math_verify import MathRewardManager 3 | from recipe.simpletir.workers.reward_manager.math_verify_with_exec import ( 4 | MathRewardExecManager, 5 | ) 6 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /scripts/model_merger.sh: -------------------------------------------------------------------------------- 1 | HF_MODEL_DIR=... 2 | CHECKPOINT_DIR=... 3 | STEP=... 4 | TARGET_DIR=... 5 | 6 | python scripts/model_merger.py \ 7 | --backend fsdp \ 8 | --hf_model_path $HF_MODEL_DIR \ 9 | --local_dir $CHECKPOINT_DIR/global_step_$STEP/actor \ 10 | --target_dir $TARGET_DIR \ 11 | 12 | cp $HF_MODEL_DIR/tokenizer* $TARGET_DIR 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | liger-kernel 9 | numpy 10 | pandas 11 | peft 12 | pyarrow>=15.0.0 13 | pybind11 14 | pylatexenc 15 | pylint==3.3.6 16 | ray[default] 17 | tensordict<=0.6.2 18 | torchdata 19 | transformers 20 | # vllm==0.6.3.post1 21 | wandb 22 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # verl documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and navigate to http://localhost:8000 to view the documentation. -------------------------------------------------------------------------------- /requirements_sglang.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | numpy 9 | pandas 10 | peft 11 | pyarrow>=15.0.0 12 | pybind11 13 | pylatexenc 14 | ray[default]>=2.10 15 | tensordict<=0.6.2 16 | torchdata 17 | torchvision 18 | transformers 19 | wandb 20 | sglang[all]==0.4.4.post3 21 | torch-memory-saver>=0.0.5 -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-22.04 8 | tools: 9 | python: "3.11" 10 | rust: "1.70" 11 | 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: docs/requirements-docs.txt 18 | - method: pip 19 | path: . 20 | -------------------------------------------------------------------------------- /docker/Dockerfile.megatron: -------------------------------------------------------------------------------- 1 | FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 2 | 3 | RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable 4 | 5 | RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e . -------------------------------------------------------------------------------- /docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /.github/workflows/secrets_scan.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 16 | with: 17 | fetch-depth: 0 18 | - name: Secret Scanning 19 | uses: trufflesecurity/trufflehog@7dc056a193116ba8d82154bf0549381c8fb8545c # v3.88.14 20 | with: 21 | extra_args: --results=verified,unknown -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | from .sglang_rollout import SGLangRollout 15 | -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import log_gpu_memory_usage -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "char_ords": [ 3 | 48, 4 | 49, 5 | 50, 6 | 51, 7 | 52, 8 | 53, 9 | 54, 10 | 55, 11 | 56, 12 | 57, 13 | 44, 14 | 58 15 | ], 16 | "model_max_length": 2048, 17 | "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}" 18 | } -------------------------------------------------------------------------------- /tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .digit_completion import DigitCompletion 16 | 17 | __all__ = ['DigitCompletion'] -------------------------------------------------------------------------------- /verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive import NaiveRewardManager 16 | from .prime import PrimeRewardManager -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import tokenizer 16 | from .tokenizer import hf_tokenizer, hf_processor 17 | 18 | __all__ = tokenizer.__all__ -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | -------------------------------------------------------------------------------- /examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet 5 | model_path=deepseek-ai/deepseek-llm-7b-chat 6 | 7 | python3 -m verl.trainer.main_generation \ 8 | trainer.nnodes=2 \ 9 | trainer.n_gpus_per_node=8 \ 10 | data.path=$data_path \ 11 | data.prompt_key=prompt \ 12 | data.n_samples=1 \ 13 | data.output_path=$save_path \ 14 | model.path=$model_path\ 15 | +model.trust_remote_code=True \ 16 | rollout.temperature=1.0 \ 17 | rollout.top_k=50 \ 18 | rollout.top_p=0.7 \ 19 | rollout.prompt_length=2048 \ 20 | rollout.response_length=1024 \ 21 | rollout.tensor_model_parallel_size=16 \ 22 | rollout.gpu_memory_utilization=0.8 23 | -------------------------------------------------------------------------------- /examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet 5 | model_path=deepseek-ai/deepseek-llm-7b-chat 6 | 7 | python3 -m verl.trainer.main_generation \ 8 | trainer.nnodes=1 \ 9 | trainer.n_gpus_per_node=8 \ 10 | data.path=$data_path \ 11 | data.prompt_key=prompt \ 12 | data.n_samples=1 \ 13 | data.output_path=$save_path \ 14 | model.path=$model_path \ 15 | +model.trust_remote_code=True \ 16 | rollout.temperature=1.0 \ 17 | rollout.top_k=50 \ 18 | rollout.top_p=0.7 \ 19 | rollout.prompt_length=2048 \ 20 | rollout.response_length=1024 \ 21 | rollout.tensor_model_parallel_size=2 \ 22 | rollout.gpu_memory_utilization=0.8 23 | -------------------------------------------------------------------------------- /tests/distributed/run_all.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #!/usr/bin/env bash 16 | 17 | set -e -x 18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaForCausalLM" 4 | ], 5 | "attention_bias": false, 6 | "attention_dropout": 0.0, 7 | "bos_token_id": null, 8 | "eos_token_id": 1, 9 | "hidden_act": "silu", 10 | "hidden_size": 128, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 344, 13 | "max_position_embeddings": 2048, 14 | "mlp_bias": false, 15 | "model_type": "llama", 16 | "num_attention_heads": 4, 17 | "num_hidden_layers": 4, 18 | "num_key_value_heads": 4, 19 | "pad_token_id": 2, 20 | "pretraining_tp": 1, 21 | "rms_norm_eps": 1e-06, 22 | "rope_scaling": null, 23 | "rope_theta": 10000.0, 24 | "tie_word_embeddings": false, 25 | "torch_dtype": "bfloat16", 26 | "transformers_version": "4.43.3", 27 | "use_cache": true, 28 | "vocab_size": 16 29 | } 30 | -------------------------------------------------------------------------------- /tests/sft/run_sft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | -m verl.trainer.fsdp_sft_trainer \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size_per_gpu=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | trainer.default_local_dir=$HOME/ckpts/ \ 16 | trainer.project_name=qwen2.5-sft \ 17 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 18 | trainer.total_training_steps=1 \ 19 | trainer.logger=['console'] \ 20 | trainer.default_hdfs_dir=null $@ 21 | 22 | rm -rf $HOME/ckpts/ -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool 17 | 18 | __all__ = ['Worker', 'WorkerGroup', 'ClassWithInitArgs', 'ResourcePool'] -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /tests/sanity/test_import.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_import(): 17 | import verl 18 | print(verl.__version__) 19 | 20 | 21 | def test_single_controller_import(): 22 | import verl.single_controller 23 | print(verl.single_controller.__version__) 24 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 6 | 7 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 8 | algorithm.adv_estimator=gae \ 9 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 10 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 11 | actor_rollout_ref.actor.use_kl_loss=False \ 12 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 13 | actor_rollout_ref.rollout.name=vllm \ 14 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 15 | actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \ 16 | critic.model.path=Qwen/Qwen2.5-0.5B \ 17 | critic.model.use_remove_padding=True \ 18 | algorithm.use_kl_in_reward=False \ 19 | trainer.total_epochs=1 -------------------------------------------------------------------------------- /sandbox/README.md: -------------------------------------------------------------------------------- 1 | # Local sandbox setup with firejail 2 | 3 | Install: 4 | 5 | ```bash 6 | sudo apt-get update && sudo apt-get install -y firejail 7 | pip install "fastapi[all]" uvicorn 8 | ``` 9 | 10 | Run: 11 | 12 | ```bash 13 | cd sandbox 14 | uvicorn sandbox_api:app --host 127.0.0.1 --port 12345 --workers 4 15 | ``` 16 | 17 | Test: 18 | 19 | ```bash 20 | # test code exec 21 | curl -X POST http://127.0.0.1:12345/faas/sandbox/ -H 'Content-Type: application/json' -d '{"code":"print(1+1)","language":"python","compile_timeout":1.0,"run_timeout":3.0}' 22 | # test stdin 23 | curl -X POST http://127.0.0.1:12345/faas/sandbox/ -H 'Content-Type: application/json' -d '{"code":"name = input(\"Your name:\"); print(f\"Hi, {name}!\")","stdin":"Alice","language":"python","compile_timeout":1.0,"run_timeout":3.0}' 24 | # test via python code 25 | SANDBOX_ENDPOINT=http://127.0.0.1:12345/faas/sandbox/ python local_sandbox.py 26 | ``` 27 | -------------------------------------------------------------------------------- /tests/sft/run_sft_sp_loss_match.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | tests/sft/test_sp_loss_match.py \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | ulysses_sequence_parallel_size=2 \ 16 | use_remove_padding=True \ 17 | trainer.default_local_dir=$HOME/ckpts/ \ 18 | trainer.project_name=qwen2.5-sft \ 19 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 20 | trainer.total_training_steps=1 \ 21 | trainer.logger=['console'] \ 22 | trainer.default_hdfs_dir=null $@ 23 | 24 | rm -rf $HOME/ckpts/ 25 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_mlp import ParallelLlamaMLP 18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 19 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelQwen2Attention 16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad 17 | from .parallel_mlp import ParallelQwen2MLP 18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm 19 | -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_gemma_7b.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=prompt \ 19 | data.response_key=answer \ 20 | data.micro_batch_size_per_gpu=4 \ 21 | model.partial_pretrain=google/gemma-1.1-7b-it \ 22 | trainer.default_local_dir=$save_path \ 23 | trainer.project_name=gsm8k-sft \ 24 | trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ 25 | trainer.total_epochs=4 \ 26 | trainer.logger=['console','wandb'] \ 27 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .task import DigitCompletion, generate_ground_truth_response 16 | from .tokenizer import CharTokenizer 17 | 18 | from transformers import AutoTokenizer, LlamaConfig 19 | 20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True) 21 | 22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer'] -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | # original model with megatron 17 | ParallelLlamaModel, 18 | ParallelLlamaForCausalLM, 19 | # rmpad with megatron 20 | ParallelLlamaForCausalLMRmPad, 21 | ParallelLlamaForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelLlamaForCausalLMRmPadPP, 24 | ParallelLlamaForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_qwen2_megatron import ( 16 | # original model with megatron 17 | ParallelQwen2Model, 18 | ParallelQwen2ForCausalLM, 19 | # rmpad with megatron 20 | ParallelQwen2ForCausalLMRmPad, 21 | ParallelQwen2ForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelQwen2ForCausalLMRmPadPP, 24 | ParallelQwen2ForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | # Note(haibin.lin): single_controller.__version__ is deprecated 20 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f: 21 | __version__ = f.read().strip() 22 | 23 | from . import base 24 | from .base import * 25 | 26 | __all__ = base.__all__ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_gemma_2b.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | +data.prompt_dict_keys=['question'] \ 23 | +data.response_dict_keys=['answer'] \ 24 | data.micro_batch_size_per_gpu=4 \ 25 | model.partial_pretrain=google/gemma-2b-it \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 29 | trainer.total_epochs=2 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_deepseek_6b7.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | +data.prompt_dict_keys=['question'] \ 21 | +data.response_dict_keys=['answer'] \ 22 | data.micro_batch_size_per_gpu=4 \ 23 | model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ 24 | trainer.default_local_dir=$save_path \ 25 | trainer.project_name=gsm8k-sft \ 26 | trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ 27 | trainer.total_epochs=4 \ 28 | trainer.logger=['console','wandb'] \ 29 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class WorkerGroupRegisterCenter: 20 | 21 | def __init__(self, rank_zero_info): 22 | self.rank_zero_info = rank_zero_info 23 | 24 | def get_rank_zero_info(self): 25 | return self.rank_zero_info 26 | 27 | 28 | def create_worker_group_register_center(name, info): 29 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 30 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint Check 2 | 3 | on: 4 | push: 5 | paths: 6 | - '**.py' 7 | - 'requirements.txt' 8 | - 'pyproject.toml' 9 | pull_request: 10 | paths: 11 | - '**.py' 12 | - 'requirements.txt' 13 | - 'pyproject.toml' 14 | 15 | jobs: 16 | lint: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout code 21 | uses: actions/checkout@v3 22 | 23 | - name: Set up Python 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: '3.12' 27 | 28 | - name: Install pylint (version from requirements.txt) 29 | run: | 30 | PYLINT_VERSION=$(grep '^pylint' requirements.txt) 31 | if [ -z "$PYLINT_VERSION" ]; then 32 | echo "No pylint version found in requirements.txt" 33 | exit 1 34 | fi 35 | # only install pylint to avoid dependency problems on CPU 36 | pip install "$PYLINT_VERSION" 37 | 38 | - name: Run pylint 39 | run: | 40 | pylint --recursive=y --rcfile=pyproject.toml ./ 41 | -------------------------------------------------------------------------------- /tests/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- 1 | # Tested with 1 & 4 GPUs 2 | set -x 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: run_gen_qwen05.sh [other_configs...]" 6 | exit 1 7 | fi 8 | 9 | nproc_per_node=$1 10 | save_path=$2 11 | infer_tp=${3:-2} # Default tensor parallel size to 2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-0.5B-Instruct 17 | 18 | python3 -m verl.trainer.main_generation \ 19 | trainer.nnodes=1 \ 20 | trainer.n_gpus_per_node=$nproc_per_node \ 21 | data.path=$HOME/data/gsm8k/test.parquet \ 22 | data.prompt_key=prompt \ 23 | data.n_samples=1 \ 24 | data.output_path=$save_path \ 25 | model.path=$HOME/models/Qwen/Qwen2.5-0.5B-Instruct \ 26 | +model.trust_remote_code=True \ 27 | rollout.temperature=1.0 \ 28 | rollout.top_k=50 \ 29 | rollout.top_p=0.7 \ 30 | rollout.prompt_length=2048 \ 31 | rollout.response_length=1024 \ 32 | rollout.tensor_model_parallel_size=$infer_tp \ 33 | rollout.gpu_memory_utilization=0.8 34 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | trainer.default_local_dir=$save_path \ 26 | trainer.project_name=gsm8k-sft \ 27 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \ 28 | trainer.logger=['console'] \ 29 | trainer.total_training_steps=1 \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | import torch 18 | 19 | 20 | def set_basic_config(level): 21 | """ 22 | This function sets the global logging format and level. It will be called when import verl 23 | """ 24 | logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level) 25 | 26 | 27 | def log_to_file(string): 28 | print(string) 29 | if os.path.isdir('logs'): 30 | with open(f'logs/log_{torch.distributed.get_rank()}', 'a+') as f: 31 | f.write(string + '\n') 32 | -------------------------------------------------------------------------------- /docker/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | # Build the docker in the repo dir: 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 . 3 | # docker images # you can find your built docker 4 | 5 | 6 | FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 7 | 8 | # Set working directory 9 | # WORKDIR $PWD/app 10 | 11 | # Set environment variables 12 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942" 13 | 14 | # Install vllm 15 | RUN pip uninstall -y vllm && \ 16 | rm -rf vllm && \ 17 | git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \ 18 | cd vllm && \ 19 | MAX_JOBS=$(nproc) python3 setup.py install && \ 20 | cd .. && \ 21 | rm -rf vllm 22 | 23 | # Copy the entire project directory 24 | COPY . . 25 | 26 | # Install dependencies 27 | RUN pip install "tensordict<0.6" --no-deps && \ 28 | pip install accelerate \ 29 | codetiming \ 30 | datasets \ 31 | dill \ 32 | hydra-core \ 33 | liger-kernel \ 34 | numpy \ 35 | pandas \ 36 | peft \ 37 | "pyarrow>=15.0.0" \ 38 | pylatexenc \ 39 | "ray[data,train,tune,serve]" \ 40 | torchdata \ 41 | transformers \ 42 | wandb \ 43 | orjson \ 44 | pybind11 && \ 45 | pip install -e . --no-deps -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_sft_qwen05_sp2_liger.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.total_training_steps=1 \ 31 | trainer.default_hdfs_dir=null $@ \ 32 | ulysses_sequence_parallel_size=2 \ 33 | use_remove_padding=true -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | import os 16 | 17 | 18 | def initialize_global_process_group(timeout_second=36000): 19 | import torch.distributed 20 | from datetime import timedelta 21 | torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second)) 22 | local_rank = int(os.environ["LOCAL_RANK"]) 23 | rank = int(os.environ["RANK"]) 24 | world_size = int(os.environ["WORLD_SIZE"]) 25 | 26 | if torch.distributed.is_initialized(): 27 | torch.cuda.set_device(local_rank) 28 | return local_rank, rank, world_size 29 | -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_qwen_05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_epochs=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_sft_qwen05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_training_steps=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /tests/e2e/check_custom_rwd_fn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | 18 | def check_congratulations_in_file(output_file): 19 | with open(output_file, 'r') as f: 20 | output = f.read() 21 | 22 | success_message = "Congratulations!!! You have called my_reward_function successfully!!!" 23 | assert success_message in output, f'Success message of my_reward_function not found in {output_file}' 24 | print("Check passes") 25 | 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--output_file', required=True, type=str) 30 | 31 | args = parser.parse_args() 32 | 33 | check_congratulations_in_file(args.output_file) 34 | -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.distributed as dist 17 | import logging 18 | 19 | 20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0): 21 | if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank): 22 | memory_allocated = torch.cuda.memory_allocated() / 1024**3 23 | memory_reserved = torch.cuda.memory_reserved() / 1024**3 24 | 25 | message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}' 26 | 27 | if logger is None: 28 | print(message) 29 | else: 30 | logger.log(msg=message, level=level) 31 | -------------------------------------------------------------------------------- /tests/kill_github_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 1 ]; then 4 | echo "Usage: $0 YOUR_GITHUB_TOKEN" 5 | echo "Please provide exactly one input argument for your github token." 6 | exit 1 7 | fi 8 | 9 | # Set your GitHub repository details 10 | OWNER="volcengine" 11 | REPO="verl" 12 | TOKEN=$1 13 | 14 | # API URL for workflow runs 15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued" 16 | 17 | # Check required commands 18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; } 19 | 20 | # Get queued workflow runs 21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL") 22 | 23 | # Run this for debugging 24 | # echo $response 25 | 26 | # Extract run IDs 27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id') 28 | 29 | if [ -z "$queued_run_ids" ]; then 30 | echo "No queued workflow runs found." 31 | exit 0 32 | fi 33 | 34 | # Cancel each queued run 35 | for run_id in $queued_run_ids; do 36 | echo "Cancelling run $run_id" 37 | cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel" 38 | curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url" 39 | done 40 | 41 | echo "Cancelled all queued workflow runs." 42 | -------------------------------------------------------------------------------- /verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | from mathruler.grader import extract_boxed_content, grade_answer 17 | 18 | 19 | def format_reward(predict_str: str) -> float: 20 | pattern = re.compile(r'.*.*\\boxed\{.*\}.*', re.DOTALL) 21 | match_result = re.fullmatch(pattern, predict_str) 22 | return 1.0 if match_result else 0.0 23 | 24 | 25 | def acc_reward(predict_str: str, ground_truth: str) -> float: 26 | answer = extract_boxed_content(predict_str) 27 | return 1.0 if grade_answer(answer, ground_truth) else 0.0 28 | 29 | 30 | def compute_score(predict_str: str, ground_truth: str) -> float: 31 | return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str) 32 | -------------------------------------------------------------------------------- /verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | 5 | data: 6 | path: ~/data/rlhf/math/test.parquet 7 | prompt_key: prompt 8 | n_samples: 5 9 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 10 | batch_size: 128 11 | 12 | model: 13 | path: ~/models/Qwen2-7B-Instruct 14 | external_lib: null 15 | rollout: 16 | name: vllm 17 | temperature: 1.0 18 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 19 | top_p: 0.7 20 | prompt_length: 1536 21 | response_length: 512 22 | # for vllm rollout 23 | dtype: bfloat16 # should align with FSDP 24 | gpu_memory_utilization: 0.5 25 | ignore_eos: False 26 | enforce_eager: True 27 | free_cache_engine: True 28 | load_format: dummy_dtensor 29 | tensor_model_parallel_size: 1 30 | max_num_batched_tokens: 8192 31 | max_model_len: null 32 | max_num_seqs: 1024 33 | log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu 34 | log_prob_micro_batch_size_per_gpu: 8 35 | # for fire vllm rollout 36 | use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236 37 | # for hf rollout 38 | do_sample: True 39 | disable_log_stats: True 40 | enable_chunked_prefill: True 41 | n: 1 42 | actor: 43 | strategy: fsdp # This is for backward-compatibility 44 | ulysses_sequence_parallel_size: 1 # sp size 45 | fsdp_config: 46 | fsdp_size: -1 -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- 1 | # Digit completion 2 | 3 | This is an example of solving a digit completion problem. The problem is defined as below: 4 | 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers. 6 | If the max number is reached, the next number should be modulo with max number. 7 | 8 | For example, 9 | - prompt = [1, 2, 3] 10 | - N = 5 11 | - max_number = 6 12 | 13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1]. 14 | 15 | # Environment definition 16 | 17 | The core definition of the task is defined in verl/envs/digit_completion/task.py 18 | 19 | It is highly recommended to take a look at it for better understanding. 20 | 21 | 22 | 23 | # Run experiments 24 | 25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory) 26 | 27 | ```bash 28 | # cd examples/arithmetic_sequence/rl 29 | 30 | # Specify the config path and config name (current working dir) 31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' 32 | 33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using: 34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config 35 | 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu 4 | micro_batch_size_per_gpu: 4 # this is also val batch size 5 | train_files: ~/data/gsm8k/train.parquet 6 | val_files: ~/data/gsm8k/test.parquet 7 | prompt_key: question 8 | response_key: answer 9 | max_length: 1024 10 | truncation: error 11 | balance_dp_token: False 12 | chat_template: null 13 | custom_cls: 14 | path: null 15 | name: null 16 | model: 17 | partial_pretrain: ~/models/gemma-1.1-7b-it 18 | fsdp_config: 19 | wrap_policy: 20 | min_num_params: 0 21 | cpu_offload: False 22 | offload_params: False 23 | external_lib: null 24 | enable_gradient_checkpointing: False 25 | trust_remote_code: False 26 | lora_rank: 0 # Set to positive value to enable LoRA (e.g., 32) 27 | lora_alpha: 16 # LoRA scaling factor 28 | target_modules: all-linear # Target modules for LoRA adaptation 29 | use_liger: False 30 | optim: 31 | lr: 1e-5 32 | betas: [0.9, 0.95] 33 | weight_decay: 0.01 34 | warmup_steps_ratio: 0.1 35 | clip_grad: 1.0 36 | ulysses_sequence_parallel_size: 1 37 | use_remove_padding: False 38 | trainer: 39 | default_local_dir: /tmp/sft_model 40 | default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here 41 | resume_path: null 42 | project_name: gsm8k-sft 43 | experiment_name: test 44 | total_epochs: 4 45 | total_training_steps: null 46 | logger: ['console'] 47 | seed: 1 48 | 49 | -------------------------------------------------------------------------------- /verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | try: 16 | from math_verify.metric import math_metric 17 | from math_verify.parser import LatexExtractionConfig, ExprExtractionConfig 18 | except ImportError: 19 | print("To use Math-Verify, please install it first by running `pip install math-verify`.") 20 | 21 | 22 | def compute_score(model_output: str, ground_truth: str) -> bool: 23 | verify_func = math_metric( 24 | gold_extraction_target=(LatexExtractionConfig(),), 25 | pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), 26 | ) 27 | ret_score = 0. 28 | 29 | # Wrap the ground truth in \boxed{} format for verification 30 | ground_truth_boxed = "\\boxed{" + ground_truth + "}" 31 | try: 32 | ret_score, _ = verify_func([ground_truth_boxed], [model_output]) 33 | except Exception as e: 34 | pass 35 | 36 | return ret_score 37 | -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | 22 | from .protocol import DataProto 23 | 24 | from .utils.logging_utils import set_basic_config 25 | import logging 26 | 27 | set_basic_config(level=logging.WARNING) 28 | 29 | from . import single_controller 30 | 31 | __all__ = ['DataProto', "__version__"] 32 | 33 | if os.getenv('VERL_USE_MODELSCOPE', 'False').lower() == 'true': 34 | import importlib 35 | if importlib.util.find_spec("modelscope") is None: 36 | raise ImportError(f'You are using the modelscope hub, please install modelscope by `pip install modelscope -U`') 37 | # Patch hub to download models from modelscope to speed up. 38 | from modelscope.utils.hf_util import patch_hub 39 | patch_hub() 40 | -------------------------------------------------------------------------------- /.github/workflows/e2e_digit_completion_fire.yml: -------------------------------------------------------------------------------- 1 | name: e2e_digit_completion_fire 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/e2e_digit_completion_fire.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/e2e_digit_completion_fire.yml 18 | - "tests/e2e/*.sh" 19 | 20 | # Declare permissions just read content. 21 | permissions: 22 | contents: read 23 | 24 | jobs: 25 | e2e_digit_completion: 26 | runs-on: [self-hosted, l20-1] 27 | timeout-minutes: 20 # Increase this timeout value as needed 28 | env: 29 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 30 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 31 | NO_PROXY: "localhost,127.0.0.1" 32 | HF_HUB_ENABLE_HF_TRANSFER: 1 33 | container: 34 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 35 | options: --gpus all --shm-size=10g 36 | steps: 37 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 38 | with: 39 | fetch-depth: 0 40 | - name: Install the current repository 41 | run: | 42 | pip3 install hf_transfer 43 | pip3 install -e .[test] 44 | - name: Running digit completon e2e training tests on 8 L20 GPUs 45 | run: | 46 | ray stop --force 47 | bash tests/e2e/run_ray_trainer_fire_sampling.sh 48 | -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | A Ray logger will receive logging info from different processes. 16 | """ 17 | import numbers 18 | from typing import Dict 19 | 20 | 21 | def concat_dict_to_str(dict: Dict, step): 22 | output = [f'step:{step}'] 23 | for k, v in dict.items(): 24 | if isinstance(v, numbers.Number): 25 | output.append(f'{k}:{v:.3f}') 26 | output_str = ' - '.join(output) 27 | return output_str 28 | 29 | 30 | class LocalLogger: 31 | 32 | def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False): 33 | self.print_to_console = print_to_console 34 | if print_to_console: 35 | print('Using LocalLogger is deprecated. The constructor API will change ') 36 | 37 | def flush(self): 38 | pass 39 | 40 | def log(self, data, step): 41 | if self.print_to_console: 42 | print(concat_dict_to_str(data, step=step), flush=True) -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import ray 19 | 20 | import concurrent.futures 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | 25 | def put_data(index, data): 26 | return index, ray.put(data) 27 | 28 | if max_workers is None: 29 | max_workers = min(len(data_list), 16) 30 | 31 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 32 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 33 | res_lst = [] 34 | for future in concurrent.futures.as_completed(data_list_f): 35 | res_lst.append(future.result()) 36 | 37 | # reorder based on index 38 | output = [None for _ in range(len(data_list))] 39 | for res in res_lst: 40 | index, data_ref = res 41 | output[index] = data_ref 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from transformers import AutoTokenizer 17 | from verl.utils import hf_tokenizer 18 | from verl.utils.dataset.rm_dataset import RMDataset 19 | 20 | 21 | def get_rm_data(): 22 | # prepare test dataset 23 | url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet" 24 | local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/') 25 | local_path = os.path.join(local_folder, 'test.parquet') 26 | os.makedirs(local_folder, exist_ok=True) 27 | return local_path 28 | 29 | 30 | def test_rm_dataset(): 31 | tokenizer = hf_tokenizer("facebook/opt-1.3b") 32 | local_path = get_rm_data() 33 | dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512) 34 | data = dataset[0]['input_ids'] 35 | output = tokenizer.batch_decode(data) 36 | assert len(output) > 1 37 | assert type(output[0]) == str 38 | -------------------------------------------------------------------------------- /recipe/simpletir/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def _default_compute_score(data_source, solution_str, ground_truth, extra_info=None): 17 | """Default reward computation function. 18 | 19 | Returns: 20 | Union[float, Dict[str, Any]]: Either a float score or a dictionary with 'score' and optional 'extra_info' 21 | """ 22 | if "simplelr_math_35" in data_source or "deepscaler" in data_source: 23 | from . import hf_math_verify 24 | 25 | res = hf_math_verify.compute_score(solution_str, ground_truth) 26 | elif "code" in data_source or "LeetCodeDataset" in data_source: 27 | from . import code 28 | 29 | res = code.compute_score(solution_str, ground_truth, extra_info=extra_info) 30 | else: 31 | raise ValueError(f"Unknown data source: {data_source}") 32 | 33 | if isinstance(res, (int, float, bool)): 34 | return float(res) 35 | elif isinstance(res, dict): 36 | return res 37 | else: 38 | return float(res[0]) 39 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt" 6 | 7 | export PATH=$PATH:~/.local/bin 8 | 9 | rm -rf $OUTPUT_FILE 10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 11 | algorithm.adv_estimator=gae \ 12 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 13 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 14 | data.train_batch_size=800 \ 15 | data.max_prompt_length=16 \ 16 | data.max_response_length=32 \ 17 | data.return_raw_input_ids=True \ 18 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 19 | actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \ 20 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=200 \ 21 | actor_rollout_ref.actor.entropy_coeff=0 \ 22 | actor_rollout_ref.actor.optim.lr=1e-4 \ 23 | actor_rollout_ref.actor.use_kl_loss=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \ 25 | actor_rollout_ref.rollout.name=hf \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 27 | critic.ppo_micro_batch_size_per_gpu=200 \ 28 | critic.model.path=tests/e2e/arithmetic_sequence/model \ 29 | critic.optim.lr=1e-3 \ 30 | algorithm.use_kl_in_reward=False \ 31 | trainer.total_epochs=200 \ 32 | trainer.experiment_name=arithmetic_sequences \ 33 | trainer.logger=['console'] \ 34 | trainer.n_gpus_per_node=1 \ 35 | trainer.test_freq=1 \ 36 | trainer.save_freq=110 | tee $OUTPUT_FILE; 37 | 38 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE 39 | rm -rf $OUTPUT_FILE 40 | -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from importlib.metadata import version, PackageNotFoundError 16 | 17 | ### 18 | # [SUPPORT AMD:] 19 | import torch 20 | ### 21 | 22 | 23 | def get_version(pkg): 24 | try: 25 | return version(pkg) 26 | except PackageNotFoundError: 27 | return None 28 | 29 | 30 | package_name = 'vllm' 31 | package_version = get_version(package_name) 32 | 33 | ### 34 | # package_version = get_version(package_name) 35 | # [SUPPORT AMD:] 36 | if "AMD" in torch.cuda.get_device_name(): 37 | import re 38 | package_version = version(package_name) 39 | package_version = re.match(r'(\d+\.\d+\.?\d*)', package_version).group(1) 40 | else: 41 | package_version = get_version(package_name) 42 | ### 43 | 44 | if package_version <= '0.6.3': 45 | vllm_mode = 'customized' 46 | from .vllm_rollout import vLLMRollout 47 | from .fire_vllm_rollout import FIREvLLMRollout 48 | else: 49 | vllm_mode = 'spmd' 50 | from .vllm_rollout_spmd import vLLMRollout 51 | -------------------------------------------------------------------------------- /.github/workflows/sanity.yml: -------------------------------------------------------------------------------- 1 | name: sanity 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/sanity.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - .github/workflows/sanity.yml 20 | 21 | # Cancel jobs on the same ref if a new one is triggered 22 | concurrency: 23 | group: ${{ github.workflow }}-${{ github.ref }} 24 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 25 | 26 | # Declare permissions just read content. 27 | permissions: 28 | contents: read 29 | 30 | jobs: 31 | sanity: 32 | runs-on: ubuntu-latest 33 | timeout-minutes: 5 # Increase this timeout value as needed 34 | strategy: 35 | matrix: 36 | python-version: ["3.10"] 37 | steps: 38 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 39 | - name: Set up Python ${{ matrix.python-version }} 40 | uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 41 | with: 42 | python-version: ${{ matrix.python-version }} 43 | - name: Install the current repository 44 | run: | 45 | pip install -e .[test] 46 | - name: Run sanity test 47 | run: | 48 | pytest -s -x tests/sanity 49 | - name: Run utility test 50 | run: | 51 | pytest -s -x tests/utility 52 | - name: Run license test 53 | run: | 54 | python3 tests/sanity/check_license.py --directory . 55 | -------------------------------------------------------------------------------- /.github/workflows/sandbox.yml: -------------------------------------------------------------------------------- 1 | name: sandbox 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/sandbox.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - .github/workflows/sandbox.yml 20 | 21 | # Cancel jobs on the same ref if a new one is triggered 22 | concurrency: 23 | group: ${{ github.workflow }}-${{ github.ref }} 24 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 25 | 26 | # Declare permissions just read content. 27 | permissions: 28 | contents: read 29 | 30 | jobs: 31 | sandbox: 32 | runs-on: [self-hosted, l20-0] 33 | timeout-minutes: 3 # Increase this timeout value as needed 34 | env: 35 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 36 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 37 | NO_PROXY: "localhost,127.0.0.1" 38 | HF_HUB_ENABLE_HF_TRANSFER: 1 39 | container: 40 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 41 | options: --gpus all --shm-size=10g 42 | steps: 43 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 44 | with: 45 | fetch-depth: 0 46 | - name: Install the current repository 47 | run: | 48 | pip3 install hf_transfer 49 | pip3 install -e .[test,prime] 50 | pip3 install vllm==0.5.4 51 | - name: Running sandbox tests on 8 L20 GPUs 52 | run: | 53 | cd tests/sandbox 54 | pytest -s -x . 55 | -------------------------------------------------------------------------------- /.github/workflows/e2e_gsm8k_prime.yml: -------------------------------------------------------------------------------- 1 | name: e2e_gsm8k_prime 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/e2e_gsm8k_prime.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - "verl/trainer/config/*.yaml" 20 | - .github/workflows/e2e_gsm8k_prime.yml 21 | - "tests/e2e/*.sh" 22 | 23 | # Declare permissions just read content. 24 | permissions: 25 | contents: read 26 | 27 | jobs: 28 | e2e_gsm8k: 29 | runs-on: [self-hosted, l20-1] 30 | timeout-minutes: 40 # Increase this timeout value as needed 31 | env: 32 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 33 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 34 | NO_PROXY: "localhost,127.0.0.1" 35 | HF_HUB_ENABLE_HF_TRANSFER: 1 36 | container: 37 | image: hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2 38 | options: --gpus all --shm-size=10g 39 | steps: 40 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 41 | with: 42 | fetch-depth: 0 43 | - name: Install the current repository 44 | run: | 45 | pip3 install hf_transfer 46 | pip3 install -e .[test,gpu] 47 | - name: Prepare gsm8k dataset 48 | run: | 49 | ray stop --force 50 | python3 examples/data_preprocess/gsm8k.py 51 | - name: Running gsm8k e2e with prime alg 52 | run: | 53 | ray stop --force 54 | bash tests/e2e/run_qwen_gsm8k_prime.sh -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | 18 | class MemoryBuffer: 19 | 20 | def __init__(self, numel, numel_padded, dtype): 21 | self.numel = numel 22 | self.numel_padded = numel_padded 23 | self.dtype = dtype 24 | self.data = torch.zeros(self.numel_padded, 25 | dtype=self.dtype, 26 | device=torch.cuda.current_device(), 27 | requires_grad=False) 28 | 29 | def zero(self): 30 | """Reset the buffer to zero.""" 31 | self.data.zero_() 32 | 33 | def get(self, shape, start_index): 34 | """Return a tensor with the input `shape` as a view into the 35 | 1-D data starting at `start_index`.""" 36 | end_index = start_index + shape.numel() 37 | assert end_index <= self.numel, \ 38 | 'requested tensor is out of the buffer range.' 39 | buffer_tensor = self.data[start_index:end_index] 40 | buffer_tensor = buffer_tensor.view(shape) 41 | return buffer_tensor 42 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo 16 | 17 | 18 | class MegatronWorker(Worker): 19 | 20 | def __init__(self, cuda_visible_devices=None) -> None: 21 | super().__init__(cuda_visible_devices) 22 | 23 | def get_megatron_global_info(self): 24 | from megatron.core import parallel_state as mpu 25 | tp_size = mpu.get_tensor_model_parallel_world_size() 26 | dp_size = mpu.get_data_parallel_world_size() 27 | pp_size = mpu.get_pipeline_model_parallel_world_size() 28 | info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size) 29 | return info 30 | 31 | def get_megatron_rank_info(self): 32 | from megatron.core import parallel_state as mpu 33 | tp_rank = mpu.get_tensor_model_parallel_rank() 34 | dp_rank = mpu.get_data_parallel_rank() 35 | pp_rank = mpu.get_pipeline_model_parallel_rank() 36 | info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank) 37 | return info -------------------------------------------------------------------------------- /.github/workflows/ray_test.yml: -------------------------------------------------------------------------------- 1 | name: ray 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/ray_test.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - .github/workflows/ray_test.yml 20 | 21 | # Cancel jobs on the same ref if a new one is triggered 22 | concurrency: 23 | group: ${{ github.workflow }}-${{ github.ref }} 24 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 25 | 26 | # Declare permissions just read content. 27 | permissions: 28 | contents: read 29 | 30 | jobs: 31 | ray: 32 | runs-on: [self-hosted, l20-0] 33 | timeout-minutes: 5 # Increase this timeout value as needed 34 | env: 35 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 36 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 37 | NO_PROXY: "localhost,127.0.0.1" 38 | HF_HUB_ENABLE_HF_TRANSFER: 1 39 | container: 40 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 41 | options: --gpus all --shm-size=10g 42 | steps: 43 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 44 | with: 45 | fetch-depth: 0 46 | - name: Install the current repository 47 | run: | 48 | pip install hf_transfer 49 | pip install -e .[test] 50 | pip install --upgrade "ray>=2.40.0" 51 | - name: Running ray tests that need 8 GPUs 52 | run: | 53 | cd tests/ray 54 | pytest -s -x --ignore=test_check_worker_alive.py --ignore=test_rvdz.py . 55 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_remax.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.actor.use_kl_loss=False \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 23 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 24 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 25 | algorithm.use_kl_in_reward=True \ 26 | algorithm.kl_penalty=kl \ 27 | algorithm.kl_ctrl.kl_coef=0.001 \ 28 | algorithm.adv_estimator=remax \ 29 | trainer.critic_warmup=0 \ 30 | trainer.logger=['console'] \ 31 | trainer.project_name='verl_example_gsm8k' \ 32 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 33 | trainer.n_gpus_per_node=8 \ 34 | trainer.nnodes=1 \ 35 | trainer.save_freq=-1 \ 36 | trainer.total_training_steps=1 $@ 37 | -------------------------------------------------------------------------------- /.github/workflows/e2e_ascend.yml: -------------------------------------------------------------------------------- 1 | name: e2e_ascend 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/e2e_ascend.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - .github/workflows/e2e_ascend.yml 20 | 21 | permissions: 22 | contents: read 23 | 24 | jobs: 25 | test: 26 | name: verl Ascend test (self-host) 27 | runs-on: [self-hosted, npu-0] 28 | timeout-minutes: 5 # Increase this timeout value as needed 29 | env: 30 | HF_HUB_ENABLE_HF_TRANSFER: 1 31 | container: 32 | image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10 33 | volumes: 34 | - /usr/local/dcmi:/usr/local/dcmi 35 | - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi 36 | - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ 37 | # Use self-host cache speed up pip and model download 38 | # - /home/action/actions-runner/_work/cache:/github/home/.cache/ 39 | options: >- 40 | --device /dev/davinci0 41 | --device /dev/davinci_manager 42 | --device /dev/devmm_svm 43 | --device /dev/hisi_hdc 44 | --privileged 45 | --network "host" 46 | steps: 47 | - name: Check npu and CANN info 48 | run: | 49 | cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info 50 | npu-smi info 51 | - name: Checkout volcengine/verl repo 52 | uses: actions/checkout@v4 53 | - name: Run test 54 | run: | 55 | lscpu 56 | -------------------------------------------------------------------------------- /verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import importlib 17 | from packaging.version import Version 18 | 19 | from apex.optimizers import FusedAdam as Adam 20 | from apex.optimizers import FusedSGD as SGD 21 | 22 | from megatron.core.optimizer import OptimizerConfig 23 | 24 | from megatron.core.optimizer import get_megatron_optimizer as get_megatron_optimizer_native 25 | 26 | 27 | def get_megatron_optimizer( 28 | model, 29 | config: OptimizerConfig, 30 | no_weight_decay_cond=None, 31 | scale_lr_cond=None, 32 | lr_mult=1.0, 33 | check_for_nan_in_loss_and_grad=False, 34 | overlap_param_gather=False # add for verl 35 | ): 36 | # Base optimizer. 37 | return get_megatron_optimizer_native(config=config, 38 | model_chunks=model, 39 | no_weight_decay_cond=no_weight_decay_cond, 40 | scale_lr_cond=scale_lr_cond, 41 | lr_mult=lr_mult) 42 | -------------------------------------------------------------------------------- /tests/ray/test_check_worker_alive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | import os 17 | import subprocess 18 | 19 | 20 | def test(): 21 | wait_time = 10 22 | 23 | my_env = os.environ.copy() 24 | my_env["WAIT_TIME"] = str(wait_time) 25 | 26 | p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE) 27 | 28 | count = 0 29 | while b"foo started" not in p.stdout.read(): 30 | time.sleep(1) 31 | count += 1 32 | if count > 40: 33 | raise RuntimeError("timeout for start foo in check_worker_alive/main.py") 34 | 35 | print( 36 | time.time(), 37 | f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time") 38 | time.sleep(wait_time * 1.5) 39 | print(time.time(), f"start checking") 40 | assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort" 41 | assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code" 42 | print(f"test passed") 43 | 44 | 45 | if __name__ == "__main__": 46 | test() 47 | -------------------------------------------------------------------------------- /docs/README_vllm0.8.md: -------------------------------------------------------------------------------- 1 | # Upgrading to vLLM >= 0.8 2 | 3 | ## Installation 4 | 5 | Note: This version of veRL+vLLM 0.8+ supports **FSDP** for training and **vLLM** for rollout. 6 | 7 | ```bash 8 | # Create the conda environment 9 | conda create -n verl python==3.10 10 | conda activate verl 11 | 12 | # Install verl 13 | git clone https://github.com/volcengine/verl.git 14 | cd verl 15 | pip3 install -e . 16 | 17 | # Install the latest stable version of vLLM 18 | pip3 install vllm==0.8.2 19 | 20 | # Install flash-attn 21 | pip3 install flash-attn --no-build-isolation 22 | 23 | ``` 24 | 25 | We have a pre-built docker image for veRL+vLLM 0.8.2. You can direct import it with the following command: 26 | 27 | ```bash 28 | docker pull hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2 29 | ``` 30 | 31 | ## Features 32 | 33 | vLLM 0.8+ supports cuda graph and V1 engine by default in veRL. To enable these features, remember to add the following lines to the bash script: 34 | 35 | ```bash 36 | actor_rollout_ref.rollout.enforce_eager=False \ 37 | actor_rollout_ref.rollout.free_cache_engine=False \ 38 | ``` 39 | 40 | and also **remove** the environment variable if it exists: 41 | 42 | ```bash 43 | export VLLM_ATTENTION_BACKEND=XFORMERS 44 | ``` 45 | 46 | ## Notes 47 | 48 | When you just directly upgrade vllm>=0.8, some dependency packages may undergo version changes. If you encounter the following problems: 49 | 50 | ```bash 51 | in from torch.multiprocessing.reductions import ForkingPickler ImportError: cannot import name 'ForkingPickler' from 'torch.multiprocessing.reductions' (/opt/conda/lib/python3.11/site-packages/torch/multiprocessing/reductions.py) 52 | ``` 53 | 54 | You need to upgrade `tensordict` to version 0.6.2 using the command `pip install tensordict==0.6.2`. 55 | -------------------------------------------------------------------------------- /examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 main_ppo_split.py \ 4 | algorithm.adv_estimator=gae \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.actor.use_kl_loss=False \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 23 | critic.optim.lr=1e-5 \ 24 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 25 | critic.model.enable_gradient_checkpointing=False \ 26 | critic.ppo_micro_batch_size_per_gpu=8 \ 27 | critic.model.fsdp_config.param_offload=False \ 28 | critic.model.fsdp_config.optimizer_offload=False \ 29 | algorithm.use_kl_in_reward=False \ 30 | trainer.critic_warmup=0 \ 31 | trainer.logger=['console','wandb'] \ 32 | trainer.project_name='verl_example_gsm8k' \ 33 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 34 | trainer.n_gpus_per_node=8 \ 35 | trainer.nnodes=1 \ 36 | trainer.save_freq=-1 \ 37 | trainer.total_epochs=15 $@ 38 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.actor.use_kl_loss=True \ 19 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 20 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 21 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 22 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 23 | actor_rollout_ref.rollout.name=vllm \ 24 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 25 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 26 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 27 | algorithm.use_kl_in_reward=False \ 28 | algorithm.adv_estimator=grpo \ 29 | trainer.critic_warmup=0 \ 30 | trainer.logger=['console'] \ 31 | trainer.project_name='verl_example_gsm8k' \ 32 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 33 | trainer.n_gpus_per_node=8 \ 34 | trainer.nnodes=1 \ 35 | trainer.save_freq=-1 \ 36 | trainer.total_training_steps=1 $@ 37 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_fire_sampling.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt" 6 | 7 | export PATH=$PATH:~/.local/bin 8 | 9 | rm -rf $OUTPUT_FILE 10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 11 | algorithm.adv_estimator=gae \ 12 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 13 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 14 | data.train_batch_size=800 \ 15 | data.val_batch_size=200 \ 16 | data.max_prompt_length=16 \ 17 | data.max_response_length=32 \ 18 | data.return_raw_input_ids=True \ 19 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 20 | actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \ 21 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=200 \ 22 | actor_rollout_ref.actor.entropy_coeff=0 \ 23 | actor_rollout_ref.actor.optim.lr=1e-4 \ 24 | actor_rollout_ref.actor.use_kl_loss=False \ 25 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \ 26 | actor_rollout_ref.rollout.name=hf \ 27 | actor_rollout_ref.rollout.use_fire_sampling=True \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 29 | critic.ppo_micro_batch_size_per_gpu=200 \ 30 | critic.model.path=tests/e2e/arithmetic_sequence/model \ 31 | critic.optim.lr=1e-3 \ 32 | algorithm.use_kl_in_reward=False \ 33 | trainer.total_epochs=200 \ 34 | trainer.experiment_name=arithmetic_sequences \ 35 | trainer.logger=['console'] \ 36 | trainer.n_gpus_per_node=1 \ 37 | trainer.test_freq=1 \ 38 | trainer.save_freq=110 | tee $OUTPUT_FILE; 39 | 40 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE --target 0.19 41 | rm -rf $OUTPUT_FILE 42 | -------------------------------------------------------------------------------- /.github/workflows/e2e_vlm_geo3k.yml: -------------------------------------------------------------------------------- 1 | name: e2e_vlm_geo3k 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/e2e_vlm_geo3k.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - .github/workflows/e2e_vlm_geo3k.yml 20 | - "tests/e2e/*.sh" 21 | 22 | # Declare permissions just read content. 23 | permissions: 24 | contents: read 25 | 26 | jobs: 27 | e2e_vlm_geo3k: 28 | runs-on: [self-hosted, l20-1] 29 | timeout-minutes: 10 # Increase this timeout value as needed 30 | env: 31 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 32 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 33 | NO_PROXY: "localhost,127.0.0.1" 34 | HF_HUB_ENABLE_HF_TRANSFER: 1 35 | container: 36 | image: hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2 37 | options: --gpus all --shm-size=40g 38 | steps: 39 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 40 | with: 41 | fetch-depth: 0 42 | - name: Install the current repository 43 | run: | 44 | pip3 install hf_transfer 45 | pip3 install -e .[test,geo,vllm] 46 | python -c "import transformers; print(transformers.__version__)" 47 | - name: Prepare geo3k dataset 48 | run: | 49 | ray stop --force 50 | python3 examples/data_preprocess/geo3k.py 51 | - name: Running geo3k vlm e2e training tests on 8 L20 GPUs with rmpad using function rm 52 | run: | 53 | ray stop --force 54 | bash tests/e2e/run_qwen2vl_geo3k_function_rm.sh 55 | -------------------------------------------------------------------------------- /.github/workflows/e2e_digit_completion.yml: -------------------------------------------------------------------------------- 1 | name: e2e_digit_completion 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/e2e_digit_completion.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - "verl/trainer/config/*.yaml" 20 | - .github/workflows/e2e_digit_completion.yml 21 | - "tests/e2e/*.sh" 22 | 23 | # Cancel jobs on the same ref if a new one is triggered 24 | concurrency: 25 | group: ${{ github.workflow }}-${{ github.ref }} 26 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 27 | 28 | # Declare permissions just read content. 29 | permissions: 30 | contents: read 31 | 32 | jobs: 33 | e2e_digit_completion: 34 | runs-on: [self-hosted, l20-0] 35 | timeout-minutes: 20 # Increase this timeout value as needed 36 | env: 37 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 38 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 39 | NO_PROXY: "localhost,127.0.0.1" 40 | HF_HUB_ENABLE_HF_TRANSFER: 1 41 | container: 42 | image: hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2 43 | options: --gpus all --shm-size=10g 44 | steps: 45 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 46 | with: 47 | fetch-depth: 0 48 | - name: Install the current repository 49 | run: | 50 | pip3 install hf_transfer 51 | pip3 install -e .[test] 52 | - name: Running digit completon e2e training tests on 8 L20 GPUs 53 | run: | 54 | ray stop --force 55 | bash tests/e2e/run_ray_trainer.sh 56 | -------------------------------------------------------------------------------- /docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- 1 | Add models with the Megatron-LM backend 2 | ========================================= 3 | 4 | Model 5 | ----------- 6 | 7 | The most challenging aspect to use the Megatron-LM backend is implementing 8 | the models for training. Currently, we implement Llama model that 9 | support data parallelism, tensor parallelism, pipeline parallelism (also 10 | vPP) and sequence parallelism. We also implement remove padding (sequence packing) on Llama 11 | model, which can be found in `modeling_llama_megatron.py `_. 12 | 13 | To support other model, users are required to implement: 14 | 15 | 1. Implemnt a model similar to ``modeling_llama_megatron.py`` that satisfy the 16 | parallelism requirements of Megatron-LM. Then register your model in 17 | the `registry.py `_. 18 | 2. Checkpoint utils that can load full checkpoint (e.g. huggingface 19 | checkpoint) to partitioned models during the runtime. Then register 20 | your loader to ``weight_loader_registry`` in `weight_loader_registry.py `_. 21 | 3. Weight loader that synchronize the weight from Megatron to rollout 22 | (vLLM) model. Note that both the actor model and rollout model are 23 | partitioned during runtime. So, it's advisable to map the model name 24 | in actor model implementation. Otherwise, you may need an additional 25 | name mapping and even weight transformation. The weight loader implementation 26 | is in `megatron_weight_loaders.py `_. -------------------------------------------------------------------------------- /tests/ray/test_rvdz.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class TestWorker: 20 | 21 | def __init__(self, rank, world_size, group_name): 22 | self.rank = rank 23 | self.world_size = world_size 24 | self.group_name = group_name 25 | self.communicator = None 26 | 27 | def init(self): 28 | from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray 29 | self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name) 30 | 31 | def test(self): 32 | if self.communicator is None: 33 | return None 34 | return self.communicator.rank_id() 35 | 36 | 37 | def test_rvdz(): 38 | ray.init() 39 | 40 | group_name = "test_group" 41 | world_size = 2 42 | 43 | workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)] 44 | 45 | ray.get([worker.init.remote() for worker in workers]) 46 | 47 | ranks = ray.get([worker.test.remote() for worker in workers]) 48 | 49 | assert ranks == [0, 1], f"expecting [0, 1], got {ranks}" 50 | 51 | ray.shutdown() 52 | -------------------------------------------------------------------------------- /.github/workflows/yapf_format.yml: -------------------------------------------------------------------------------- 1 | name: yapf 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/yapf_format.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - .github/workflows/yapf_format.yml 20 | 21 | # Cancel jobs on the same ref if a new one is triggered 22 | concurrency: 23 | group: ${{ github.workflow }}-${{ github.ref }} 24 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 25 | 26 | # Declare permissions just read content. 27 | permissions: 28 | contents: read 29 | 30 | jobs: 31 | yapf: 32 | runs-on: ubuntu-latest 33 | strategy: 34 | matrix: 35 | python-version: ["3.12"] 36 | steps: 37 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 38 | # - name: checkout 39 | # run: | 40 | # commits=${{ github.event.pull_request.commits }} 41 | # if [[ -n "$commits" ]]; then 42 | # # Prepare enough depth for diffs with main 43 | # git fetch --depth="$(( commits + 1 ))" 44 | # fi 45 | - name: Set up Python ${{ matrix.python-version }} 46 | uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 47 | with: 48 | python-version: ${{ matrix.python-version }} 49 | - name: Install dependencies 50 | run: | 51 | python -m pip install --upgrade pip 52 | pip install --upgrade yapf 53 | pip install toml==0.10.2 54 | - name: Running yapf 55 | run: | 56 | yapf -r -vv -d --style=./.style.yapf verl tests examples recipe 57 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import Optional 17 | 18 | from transformers import PreTrainedTokenizer 19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup 20 | from vllm.utils import LRUCache 21 | 22 | 23 | class TokenizerGroup(TokenizerGroup): 24 | """A group of tokenizers that can be used for LoRA adapters.""" 25 | 26 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, 27 | max_input_length: Optional[int]): 28 | self.enable_lora = enable_lora 29 | self.max_input_length = max_input_length 30 | self.tokenizer = tokenizer 31 | self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None 32 | 33 | # FIXME(sgm): for simplicity, we assign the special token here 34 | @property 35 | def pad_token_id(self): 36 | return self.tokenizer.pad_token_id 37 | 38 | @property 39 | def eos_token_id(self): 40 | return self.tokenizer.eos_token_id 41 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=gae \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=512 \ 8 | data.max_prompt_length=1024 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=google/gemma-2-2b-it \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=False \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.actor.use_kl_loss=False \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=False \ 26 | critic.model.path=google/gemma-2-2b-it \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size_per_gpu=4 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.optimizer_offload=False \ 31 | algorithm.use_kl_in_reward=False \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_example' \ 35 | trainer.experiment_name='gemma2b_function_rm' \ 36 | trainer.n_gpus_per_node=2 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=10 \ 40 | trainer.total_epochs=15 $@ 41 | -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 3 | ## Adding a New Huggingface Model 4 | ### Step 1: Copy the model file from HF to verl 5 | - Add a new file under verl/models/hf 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf 7 | 8 | ### Step 2: Modify the model file to use packed inputs 9 | - Remove all the code related to inference (kv cache) 10 | - Modify the inputs to include only 11 | - input_ids (total_nnz,) 12 | - cu_seqlens (total_nnz + 1,) 13 | - max_seqlen_in_batch: int 14 | - Note that this requires using flash attention with causal mask. 15 | 16 | ### Step 2.5: Add tests 17 | - Add a test to compare this version and the huggingface version 18 | - Following the infrastructure and add tests to tests/models/hf 19 | 20 | ### Step 3: Add a function to apply tensor parallelism 21 | - Please follow 22 | - https://pytorch.org/docs/stable/distributed.tensor.parallel.html 23 | - https://pytorch.org/tutorials/intermediate/TP_tutorial.html 24 | - General comments 25 | - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward. 26 | 27 | ### Step 4: Add a function to apply data parallelism 28 | - Please use FSDP2 APIs 29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413 30 | 31 | ### Step 5: Add a function to apply pipeline parallelism 32 | - Comes in Pytorch 2.4 33 | - Currently only in alpha in nightly version 34 | - Check torchtitan for more details 35 | 36 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 17 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 18 | actor_rollout_ref.actor.use_kl_loss=True \ 19 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 20 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 21 | actor_rollout_ref.actor.entropy_coeff=0 \ 22 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 23 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 24 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 30 | algorithm.use_kl_in_reward=False \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console','wandb'] \ 33 | trainer.project_name='verl_grpo_example_gsm8k' \ 34 | trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \ 35 | trainer.n_gpus_per_node=8 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=-1 \ 38 | trainer.test_freq=5 \ 39 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /.github/workflows/e2e_lora.yml: -------------------------------------------------------------------------------- 1 | name: e2e_lora 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/e2e_lora.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - .github/workflows/e2e_lora.yml 20 | - "tests/e2e/*.sh" 21 | 22 | # Cancel jobs on the same ref if a new one is triggered 23 | concurrency: 24 | group: ${{ github.workflow }}-${{ github.ref }} 25 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 26 | 27 | # Declare permissions just read content. 28 | permissions: 29 | contents: read 30 | 31 | jobs: 32 | e2e_lora: 33 | runs-on: [self-hosted, l20-1] 34 | timeout-minutes: 5 # Increase this timeout value as needed 35 | env: 36 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 37 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 38 | NO_PROXY: "localhost,127.0.0.1" 39 | HF_HUB_ENABLE_HF_TRANSFER: 1 40 | container: 41 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 42 | options: --gpus all --shm-size=10g 43 | steps: 44 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 45 | with: 46 | fetch-depth: 0 47 | - name: Install the current repository 48 | run: | 49 | pip3 install hf_transfer peft 50 | pip3 install -e .[test] 51 | - name: Prepare gsm8k dataset 52 | run: | 53 | ray stop --force 54 | python3 examples/data_preprocess/gsm8k.py 55 | - name: Running gsm8k e2e training tests with LoRA 56 | run: | 57 | ray stop --force 58 | bash tests/sft/run_sft_qwen05_peft.sh 8 $HOME/ckpts/ 59 | rm -rf $HOME/ckpts/* -------------------------------------------------------------------------------- /verl/utils/py_functional.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contain small python utility functions 16 | """ 17 | 18 | from typing import Dict 19 | from types import SimpleNamespace 20 | 21 | 22 | def union_two_dict(dict1: Dict, dict2: Dict): 23 | """Union two dict. Will throw an error if there is an item not the same object with the same key. 24 | 25 | Args: 26 | dict1: 27 | dict2: 28 | 29 | Returns: 30 | 31 | """ 32 | for key, val in dict2.items(): 33 | if key in dict1: 34 | assert dict2[key] == dict1[key], \ 35 | f'{key} in meta_dict1 and meta_dict2 are not the same object' 36 | dict1[key] = val 37 | 38 | return dict1 39 | 40 | 41 | def append_to_dict(data: Dict, new_data: Dict): 42 | for key, val in new_data.items(): 43 | if key not in data: 44 | data[key] = [] 45 | data[key].append(val) 46 | 47 | 48 | class NestedNamespace(SimpleNamespace): 49 | 50 | def __init__(self, dictionary, **kwargs): 51 | super().__init__(**kwargs) 52 | for key, value in dictionary.items(): 53 | if isinstance(value, dict): 54 | self.__setattr__(key, NestedNamespace(value)) 55 | else: 56 | self.__setattr__(key, value) 57 | -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for reward model 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | from verl import DataProto 21 | 22 | 23 | class BasePPORewardModel(ABC): 24 | 25 | def __init__(self, config): 26 | self.config = config 27 | 28 | @abstractmethod 29 | def compute_reward(self, data: DataProto) -> DataProto: 30 | """Computing reward given input_ids. The transformers should output a tensor with shape 31 | [batch_size, sequence_length], and the value at [EOS] mask should be gathered. 32 | 33 | Args: 34 | data: must contain keys "input_ids", "attention_mask" and "position_ids". 35 | - input_ids: [batch_size, sequence_length] 36 | - attention_mask: [batch_size, sequence_length] 37 | - position_ids: [batch_size, sequence_length] 38 | 39 | Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward. 40 | Other position should have zero reward. Note that this may change in the future if we use 41 | dense reward. So, we leave the interface for general case. 42 | - reward: [batch_size, sequence_length]. 43 | 44 | """ 45 | pass 46 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=gae \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=False \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.actor.use_kl_loss=False \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=False \ 26 | critic.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size_per_gpu=4 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.optimizer_offload=False \ 31 | algorithm.use_kl_in_reward=False \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console'] \ 34 | trainer.val_before_train=False \ 35 | trainer.project_name='verl_example_gsm8k' \ 36 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.total_training_steps=1 $@ 41 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | ENGINE=${1:-vllm} 3 | 4 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 5 | 6 | python3 -m verl.trainer.main_ppo \ 7 | algorithm.adv_estimator=gae \ 8 | data.train_files=$HOME/data/gsm8k/train.parquet \ 9 | data.val_files=$HOME/data/gsm8k/test.parquet \ 10 | data.train_batch_size=1024 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=512 \ 13 | actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 18 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 19 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 20 | actor_rollout_ref.actor.use_kl_loss=False \ 21 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 22 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 23 | actor_rollout_ref.rollout.name=$ENGINE \ 24 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 25 | critic.optim.lr=1e-5 \ 26 | critic.model.use_remove_padding=True \ 27 | critic.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 28 | critic.model.enable_gradient_checkpointing=False \ 29 | critic.ppo_micro_batch_size_per_gpu=4 \ 30 | critic.model.fsdp_config.param_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.use_kl_in_reward=False \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console'] \ 35 | trainer.project_name='verl_example_gsm8k' \ 36 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=1 \ 40 | trainer.resume_mode=disable \ 41 | trainer.total_training_steps=1 42 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=1024 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \ 17 | actor_rollout_ref.actor.use_kl_loss=True \ 18 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 19 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 20 | actor_rollout_ref.actor.entropy_coeff=0 \ 21 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 22 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \ 30 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 31 | algorithm.use_kl_in_reward=False \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console'] \ 34 | trainer.project_name='verl_grpo_example_gsm8k' \ 35 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=5 \ 40 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=1024 \ 12 | data.filter_overlong_prompts=True \ 13 | data.truncation='error' \ 14 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 15 | actor_rollout_ref.actor.optim.lr=1e-6 \ 16 | actor_rollout_ref.model.use_remove_padding=True \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 19 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 20 | actor_rollout_ref.actor.use_kl_loss=True \ 21 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 22 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 23 | actor_rollout_ref.actor.entropy_coeff=0 \ 24 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 25 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 26 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 27 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 28 | actor_rollout_ref.rollout.name=vllm \ 29 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 30 | actor_rollout_ref.rollout.n=5 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | algorithm.use_kl_in_reward=False \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_grpo_example_gsm8k' \ 36 | trainer.experiment_name='qwen2_7b_function_rm_kl1e-3' \ 37 | trainer.val_before_train=False \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=5 \ 42 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /tests/sanity/check_license.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | license_head_bytedance = "Copyright 2024 Bytedance Ltd. and/or its affiliates" 16 | license_head_bytedance_25 = "Copyright 2025 Bytedance Ltd. and/or its affiliates" 17 | # Add custom license headers below 18 | license_head_prime = "Copyright 2024 PRIME team and/or its affiliates" 19 | 20 | license_headers = [license_head_bytedance, license_head_bytedance_25, license_head_prime] 21 | 22 | from pathlib import Path 23 | from argparse import ArgumentParser 24 | 25 | if __name__ == '__main__': 26 | parser = ArgumentParser() 27 | parser.add_argument('--directory', '-d', required=True, type=str) 28 | args = parser.parse_args() 29 | directory_in_str = args.directory 30 | 31 | pathlist = Path(directory_in_str).glob('**/*.py') 32 | for path in pathlist: 33 | # because path is object not string 34 | path_in_str = str(path.absolute()) 35 | print(path_in_str) 36 | with open(path_in_str, 'r', encoding='utf-8') as f: 37 | file_content = f.read() 38 | 39 | has_license = False 40 | for lh in license_headers: 41 | if lh in file_content: 42 | has_license = True 43 | break 44 | assert has_license, f'file {path_in_str} does not contain license' 45 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 5 | math_train_path=$HOME/data/math/train.parquet 6 | math_test_path=$HOME/data/math/test.parquet 7 | 8 | train_files="['$gsm8k_train_path', '$math_train_path']" 9 | test_files="['$gsm8k_test_path', '$math_test_path']" 10 | 11 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 12 | algorithm.adv_estimator=gae \ 13 | data.train_files="$train_files" \ 14 | data.val_files="$test_files" \ 15 | data.train_batch_size=1024 \ 16 | data.max_prompt_length=1024 \ 17 | data.max_response_length=512 \ 18 | data.filter_overlong_prompts=True \ 19 | data.truncation='error' \ 20 | actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 21 | actor_rollout_ref.actor.optim.lr=1e-6 \ 22 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 23 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 24 | actor_rollout_ref.actor.use_kl_loss=False \ 25 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 27 | actor_rollout_ref.rollout.name=vllm \ 28 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 29 | critic.optim.lr=1e-5 \ 30 | critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 31 | critic.model.enable_gradient_checkpointing=False \ 32 | critic.ppo_micro_batch_size_per_gpu=4 \ 33 | algorithm.use_kl_in_reward=False \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_megatron_math_gsm8k_examples' \ 37 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=5 \ 42 | trainer.total_epochs=100 $@ 43 | -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export HF_DATASETS_OFFLINE=1 4 | export TRANSFORMERS_OFFLINE=1 5 | 6 | export VLLM_ATTENTION_BACKEND=XFORMERS 7 | 8 | python3 -m verl.trainer.main_ppo \ 9 | algorithm.adv_estimator=remax \ 10 | data.train_files=$HOME/data/gsm8k/train.parquet \ 11 | data.val_files=$HOME/data/gsm8k/test.parquet \ 12 | data.train_batch_size=512 \ 13 | data.max_prompt_length=512 \ 14 | data.max_response_length=1024 \ 15 | data.filter_overlong_prompts=True \ 16 | data.truncation='error' \ 17 | actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct \ 18 | actor_rollout_ref.actor.optim.lr=1e-6 \ 19 | actor_rollout_ref.model.use_remove_padding=True \ 20 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 21 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 22 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=30000 \ 23 | actor_rollout_ref.actor.use_kl_loss=False \ 24 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 25 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 26 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 27 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 28 | actor_rollout_ref.rollout.name=vllm \ 29 | actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \ 30 | actor_rollout_ref.rollout.n=4 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | algorithm.use_kl_in_reward=True \ 33 | algorithm.kl_penalty=kl \ 34 | algorithm.kl_ctrl.kl_coef=0.001 \ 35 | trainer.critic_warmup=0 \ 36 | trainer.logger=['console','wandb'] \ 37 | trainer.project_name='verl_remax_example_gsm8k' \ 38 | trainer.experiment_name='qwen2.5_3b_function_rm_kl1e-3' \ 39 | trainer.val_before_train=False \ 40 | trainer.n_gpus_per_node=8 \ 41 | trainer.nnodes=1 \ 42 | trainer.save_freq=-1 \ 43 | trainer.test_freq=5 \ 44 | trainer.total_epochs=5 $@ 45 | -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export HF_DATASETS_OFFLINE=1 4 | export TRANSFORMERS_OFFLINE=1 5 | 6 | export VLLM_ATTENTION_BACKEND=XFORMERS 7 | 8 | python3 -m verl.trainer.main_ppo \ 9 | algorithm.adv_estimator=remax \ 10 | data.train_files=$HOME/data/gsm8k/train.parquet \ 11 | data.val_files=$HOME/data/gsm8k/test.parquet \ 12 | data.train_batch_size=1024 \ 13 | data.max_prompt_length=512 \ 14 | data.max_response_length=1024 \ 15 | data.filter_overlong_prompts=True \ 16 | data.truncation='error' \ 17 | actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct \ 18 | actor_rollout_ref.actor.optim.lr=1e-6 \ 19 | actor_rollout_ref.model.use_remove_padding=True \ 20 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 21 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 22 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 23 | actor_rollout_ref.actor.use_kl_loss=False \ 24 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 25 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 26 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 27 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 28 | actor_rollout_ref.rollout.name=vllm \ 29 | actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \ 30 | actor_rollout_ref.rollout.n=4 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | algorithm.use_kl_in_reward=True \ 33 | algorithm.kl_penalty=kl \ 34 | algorithm.kl_ctrl.kl_coef=0.001 \ 35 | trainer.critic_warmup=0 \ 36 | trainer.logger=['console','wandb'] \ 37 | trainer.project_name='verl_remax_example_gsm8k' \ 38 | trainer.experiment_name='qwen2.5_7b_function_rm_kl1e-3' \ 39 | trainer.val_before_train=False \ 40 | trainer.n_gpus_per_node=8 \ 41 | trainer.nnodes=1 \ 42 | trainer.save_freq=-1 \ 43 | trainer.test_freq=5 \ 44 | trainer.total_epochs=10 $@ 45 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml 4 | 5 | mkdir -p $HOME/models 6 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 7 | 8 | export VLLM_ATTENTION_BACKEND=XFORMERS 9 | 10 | python3 -m verl.trainer.main_ppo --config-path=config \ 11 | --config-name='ppo_megatron_trainer.yaml'\ 12 | algorithm.adv_estimator=gae \ 13 | data.train_files=$HOME/data/gsm8k/train.parquet \ 14 | data.val_files=$HOME/data/gsm8k/test.parquet \ 15 | data.train_batch_size=1024 \ 16 | data.max_prompt_length=512 \ 17 | data.max_response_length=512 \ 18 | actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 19 | actor_rollout_ref.actor.optim.lr=2e-6 \ 20 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 21 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 22 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \ 23 | actor_rollout_ref.actor.use_kl_loss=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 28 | critic.optim.lr=2e-5 \ 29 | critic.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 30 | critic.model.enable_gradient_checkpointing=False \ 31 | critic.ppo_micro_batch_size_per_gpu=4 \ 32 | critic.megatron.tensor_model_parallel_size=2 \ 33 | algorithm.use_kl_in_reward=False \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console'] \ 36 | trainer.project_name='verl_megatron_gsm8k_examples' \ 37 | trainer.experiment_name='qwen2_5_0b5_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=1 \ 42 | trainer.total_epochs=15 \ 43 | trainer.total_training_steps=3 $@ 44 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 6 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 7 | math_train_path=$HOME/data/math/train.parquet 8 | math_test_path=$HOME/data/math/test.parquet 9 | 10 | train_files="['$gsm8k_train_path', '$math_train_path']" 11 | test_files="['$gsm8k_test_path', '$math_test_path']" 12 | 13 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 14 | algorithm.adv_estimator=gae \ 15 | data.train_files="$train_files" \ 16 | data.val_files="$test_files" \ 17 | data.train_batch_size=1024 \ 18 | data.max_prompt_length=1024 \ 19 | data.max_response_length=512 \ 20 | data.filter_overlong_prompts=True \ 21 | data.truncation='error' \ 22 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 23 | actor_rollout_ref.actor.optim.lr=1e-6 \ 24 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 25 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 26 | actor_rollout_ref.actor.use_kl_loss=False \ 27 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 29 | actor_rollout_ref.rollout.name=vllm \ 30 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 31 | critic.optim.lr=1e-5 \ 32 | critic.model.path=Qwen/Qwen2-7B-Instruct \ 33 | critic.model.enable_gradient_checkpointing=False \ 34 | critic.ppo_micro_batch_size_per_gpu=4 \ 35 | algorithm.use_kl_in_reward=False \ 36 | trainer.critic_warmup=0 \ 37 | trainer.logger=['console','wandb'] \ 38 | trainer.project_name='verl_megatron_math_gsm8k_examples' \ 39 | trainer.experiment_name='qwen2_7b_function_rm' \ 40 | trainer.n_gpus_per_node=8 \ 41 | trainer.nnodes=1 \ 42 | trainer.save_freq=-1 \ 43 | trainer.test_freq=5 \ 44 | trainer.total_epochs=100 $@ 45 | -------------------------------------------------------------------------------- /examples/rloo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=rloo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=1024 \ 12 | data.filter_overlong_prompts=True \ 13 | data.truncation='error' \ 14 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 15 | actor_rollout_ref.actor.optim.lr=1e-6 \ 16 | actor_rollout_ref.model.use_remove_padding=True \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \ 19 | actor_rollout_ref.actor.use_kl_loss=False \ 20 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 21 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \ 30 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 31 | algorithm.use_kl_in_reward=True \ 32 | algorithm.kl_penalty=kl \ 33 | algorithm.kl_ctrl.kl_coef=0.001 \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_rloo_example_gsm8k' \ 37 | trainer.experiment_name='qwen2_7b_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=5 \ 42 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /tests/gpu_utility/test_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_flash_attn_cross_entropy(): 17 | from verl.utils.torch_functional import logprobs_from_logits_naive 18 | 19 | from verl.utils.debug import log_gpu_memory_usage 20 | 21 | from flash_attn.ops.triton.cross_entropy import cross_entropy_loss 22 | 23 | import torch 24 | from torch import nn 25 | 26 | log_gpu_memory_usage('At start') 27 | 28 | hidden_states = torch.randn(size=(2048, 5120), device='cuda', requires_grad=True, dtype=torch.bfloat16) 29 | 30 | linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device='cuda', dtype=torch.bfloat16) 31 | 32 | logits = linear(hidden_states) 33 | 34 | # logits = logits.float() 35 | labels = torch.randint(low=0, high=155136, size=(2048,), device='cuda') 36 | 37 | log_gpu_memory_usage('before computation') 38 | # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True) 39 | output = -cross_entropy_loss(logits, labels)[0] 40 | # output = logprobs_from_logits(logits, labels) 41 | log_gpu_memory_usage('After forward') 42 | output.sum().backward() 43 | log_gpu_memory_usage('After backward') 44 | 45 | groundtruth = logprobs_from_logits_naive(logits.float(), labels) 46 | 47 | torch.testing.assert_close(output, groundtruth) 48 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader 15 | 16 | from typing import Dict 17 | 18 | import torch.nn as nn 19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 20 | 21 | 22 | def update_hf_weight_loader(): 23 | print("no hf weight loader need to be updated") 24 | return 25 | 26 | 27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 28 | assert isinstance(actor_weights, Dict) 29 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 30 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys(): 31 | del actor_weights["lm_head.weight"] 32 | vllm_model.load_weights(actor_weights.items()) 33 | for _, module in vllm_model.named_modules(): 34 | quant_method = getattr(module, "quant_method", None) 35 | if quant_method is not None: 36 | quant_method.process_weights_after_loading(module) 37 | # FIXME: Remove this after Mixtral is updated 38 | # to use quant_method. 39 | if hasattr(module, "process_weights_after_loading"): 40 | module.process_weights_after_loading() 41 | vllm_model = vllm_model.cuda() 42 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=1024 \ 12 | data.filter_overlong_prompts=True \ 13 | data.truncation='error' \ 14 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 15 | actor_rollout_ref.actor.optim.lr=1e-6 \ 16 | actor_rollout_ref.model.use_remove_padding=True \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=40 \ 19 | actor_rollout_ref.actor.use_kl_loss=True \ 20 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 21 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 22 | actor_rollout_ref.actor.entropy_coeff=0 \ 23 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 24 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 25 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 26 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=40 \ 27 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 28 | actor_rollout_ref.rollout.name=vllm \ 29 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 30 | actor_rollout_ref.rollout.n=5 \ 31 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=40 \ 32 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 33 | algorithm.use_kl_in_reward=False \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_grpo_example_gsm8k' \ 37 | trainer.experiment_name='qwen2_7b_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=5 \ 42 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=gae \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.actor.use_kl_loss=False \ 20 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 21 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \ 22 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 23 | actor_rollout_ref.rollout.name=vllm \ 24 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 25 | critic.optim.lr=1e-5 \ 26 | critic.model.use_remove_padding=True \ 27 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 28 | critic.model.enable_gradient_checkpointing=True \ 29 | critic.ppo_micro_batch_size_per_gpu=32 \ 30 | critic.model.fsdp_config.param_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.use_kl_in_reward=False \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_example_gsm8k' \ 36 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=1 \ 41 | trainer.total_epochs=15 $@ 42 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo --config-path=config \ 4 | --config-name='ppo_megatron_trainer.yaml'\ 5 | algorithm.adv_estimator=grpo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=1024 \ 11 | data.filter_overlong_prompts=True \ 12 | data.truncation='error' \ 13 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 18 | actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \ 19 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=4 \ 20 | actor_rollout_ref.actor.use_kl_loss=True \ 21 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 22 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 23 | actor_rollout_ref.actor.entropy_coeff=0 \ 24 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 25 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 27 | actor_rollout_ref.rollout.name=vllm \ 28 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 29 | actor_rollout_ref.rollout.n=5 \ 30 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ 31 | algorithm.use_kl_in_reward=False \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_grpo_example_gsm8k' \ 35 | trainer.experiment_name='deepseek_llm_7b_function_rm_megatron' \ 36 | trainer.n_gpus_per_node=16 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=5 \ 40 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | train_files=$HOME/data/full_hh_rlhf/rl/train.parquet 4 | test_files=$HOME/data/full_hh_rlhf/rl/train.parquet # no use 5 | 6 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 7 | algorithm.adv_estimator=gae \ 8 | data.train_files="$train_files" \ 9 | data.val_files="$test_files" \ 10 | data.train_batch_size=512 \ 11 | data.max_prompt_length=128 \ 12 | data.max_response_length=128 \ 13 | data.filter_overlong_prompts=True \ 14 | data.truncation='error' \ 15 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 16 | actor_rollout_ref.actor.optim.lr=1e-6 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 19 | actor_rollout_ref.actor.use_kl_loss=False \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 26 | critic.model.enable_gradient_checkpointing=False \ 27 | critic.ppo_micro_batch_size_per_gpu=4 \ 28 | reward_model.enable=True \ 29 | reward_model.megatron.tensor_model_parallel_size=4 \ 30 | reward_model.model.path=deepseek-ai/deepseek-llm-7b-chat \ 31 | reward_model.micro_batch_size_per_gpu=4 \ 32 | reward_model.param_offload=False \ 33 | algorithm.use_kl_in_reward=False \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_megatron_full_hh_rlhf_examples' \ 37 | trainer.experiment_name='deepseek_llm_7b_model_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=5 \ 42 | trainer.total_epochs=100 $@ 43 | -------------------------------------------------------------------------------- /tests/e2e/check_results.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | import numpy as np 18 | 19 | 20 | def extract_reward_from_line(line): 21 | # TODO: this function needs error handling 22 | try: 23 | key_vals = line.split(' - ') 24 | for key_val in key_vals: 25 | key, val = key_val.split(':') 26 | if key == 'critic/rewards/mean': 27 | reward = float(val) 28 | return reward 29 | return -np.inf 30 | except Exception: 31 | return -np.inf 32 | 33 | 34 | if __name__ == '__main__': 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('--output_file', required=True, type=str) 37 | parser.add_argument('--target', type=float, default=0.2, help='target reward score') 38 | 39 | args = parser.parse_args() 40 | 41 | with open(args.output_file, 'r') as f: 42 | output = f.read().split('\n') 43 | 44 | best_reward = -np.inf 45 | for line in output: 46 | if line.startswith('step'): 47 | reward = extract_reward_from_line(line) 48 | if reward > best_reward: 49 | best_reward = reward 50 | 51 | print(f'Best reward is {best_reward}') 52 | assert best_reward > args.target, f'Best reward must be greater than {args.target}. best_reward: {best_reward}' 53 | print('Check passes') 54 | -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm0.8.sagemaker: -------------------------------------------------------------------------------- 1 | # Using a pre-built image from AWS DLC which contains the current version of python (3.10) and supported cuda version (12.1) 2 | FROM 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:2.1.0-transformers4.36.0-gpu-py310-cu121-ubuntu20.04 3 | 4 | # uninstall nv-pytorch fork 5 | RUN pip3 uninstall -y pytorch-quantization \ 6 | pytorch-triton torch torch-tensorrt torchvision \ 7 | xgboost transformer_engine flash_attn apex megatron-core 8 | 9 | # Define environments 10 | ENV MAX_JOBS=32 11 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn 12 | ENV DEBIAN_FRONTEND=noninteractive 13 | ENV NODE_OPTIONS="" 14 | ENV HF_HUB_ENABLE_HF_TRANSFER="1" 15 | 16 | # Install systemctl 17 | RUN apt-get update && \ 18 | apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \ 19 | apt-get clean 20 | 21 | # Install tini 22 | RUN apt-get update && \ 23 | apt-get install -y tini && \ 24 | apt-get clean 25 | 26 | # Install torch-2.6.0 + vllm-0.8.2 27 | RUN pip install --no-cache-dir vllm==0.8.2 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata==0.11.0 \ 28 | transformers>=4.49.0 accelerate datasets peft hf-transfer \ 29 | ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \ 30 | pytest yapf py-spy pyext pre-commit ruff 31 | 32 | # Install flash_attn-2.7.4.post1 33 | RUN pip uninstall -y transformer-engine flash-attn && \ 34 | pip install flash-attn==2.7.4.post1 --no-build-isolation 35 | 36 | # Fix cv2 37 | RUN pip uninstall -y pynvml nvidia-ml-py && \ 38 | pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \ 39 | pip install --no-cache-dir --upgrade optree>=0.13.0 40 | 41 | # Install verl 42 | RUN pip install --no-cache-dir verl[vllm] -U 43 | 44 | # Reset pip config 45 | RUN pip config unset global.index-url && \ 46 | pip config unset global.extra-index-url 47 | -------------------------------------------------------------------------------- /docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE . 2 | 3 | # the one in docker.io is an alias for the one veturbo 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN pip3 config unset global.index-url 10 | 11 | # transformers 4.47.0 contains the following bug: 12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask' 13 | RUN pip3 install --no-cache-dir \ 14 | torch==2.4.0 \ 15 | accelerate \ 16 | codetiming \ 17 | dill \ 18 | hydra-core \ 19 | numpy \ 20 | pybind11 \ 21 | tensordict \ 22 | "transformers <= 4.46.0" 23 | 24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation 25 | 26 | # vllm depends on ray, and veRL does not support ray > 2.37 27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10 28 | 29 | # install apex 30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 31 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 32 | git+https://github.com/NVIDIA/apex 33 | 34 | # install Transformer Engine 35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req 36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM 37 | # - cudnn is required by TransformerEngine 38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \ 39 | # pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0 40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation 41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7 42 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/create_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from tests.e2e.envs.digit_completion import DigitCompletion, generate_ground_truth_response 16 | from torch.utils import data 17 | import os 18 | 19 | if __name__ == '__main__': 20 | simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9) 21 | all_prompts = simple_task.get_all_prompts() 22 | 23 | # 21 * 6 * 4 24 | train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2]) 25 | train_data = list(train_data) 26 | test_data = list(test_data) 27 | 28 | train_data = [[{'role': 'user', 'content': str(item)}] \ 29 | for item in train_data] 30 | test_data = [[{'role': 'user', 'content': str(item)}] \ 31 | for item in test_data] 32 | 33 | print(f'Size of train: {len(train_data)}, size of test: {len(test_data)}') 34 | 35 | train_data = {'prompt': train_data} 36 | test_data = {'prompt': test_data} 37 | 38 | model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__))) 39 | 40 | import pandas as pd 41 | 42 | train_data_frame = pd.DataFrame(train_data) 43 | test_data_frame = pd.DataFrame(test_data) 44 | 45 | train_data_frame.to_parquet(os.path.join(model_folder, 'train.parquet')) 46 | test_data_frame.to_parquet(os.path.join(model_folder, 'test.parquet')) 47 | -------------------------------------------------------------------------------- /.github/workflows/e2e_sglang_gsm8k.yml: -------------------------------------------------------------------------------- 1 | name: e2e_sglang_gsm8k 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | - v0.2.x 10 | paths: 11 | - "**/*.py" 12 | - .github/workflows/e2e_sglang_gsm8k.yml 13 | pull_request: 14 | branches: 15 | - main 16 | - v0.2.x 17 | paths: 18 | - "**/*.py" 19 | - "verl/trainer/config/*.yaml" 20 | - .github/workflows/e2e_sglang_gsm8k.yml 21 | - "tests/e2e/*.sh" 22 | 23 | # Cancel jobs on the same ref if a new one is triggered 24 | concurrency: 25 | group: ${{ github.workflow }}-${{ github.ref }} 26 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 27 | 28 | # Declare permissions just read content. 29 | permissions: 30 | contents: read 31 | 32 | jobs: 33 | e2e_sglang_gsm8k: 34 | runs-on: [self-hosted, l20-1] 35 | timeout-minutes: 40 # Increase this timeout value as needed 36 | env: 37 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 38 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 39 | NO_PROXY: "localhost,127.0.0.1" 40 | HF_HUB_ENABLE_HF_TRANSFER: 1 41 | container: 42 | image: ocss884/verl-sglang:ngc-th2.5.1-cu126-sglang0.4.4.post3 43 | options: --gpus all --shm-size=10g 44 | steps: 45 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 46 | with: 47 | fetch-depth: 0 48 | - name: Install the current repository 49 | run: | 50 | pip3 install hf_transfer 51 | pip3 install -e .[test,gpu,sglang] --no-deps 52 | - name: Prepare gsm8k dataset 53 | run: | 54 | ray stop --force 55 | python3 examples/data_preprocess/gsm8k.py 56 | - name: Running gsm8k e2e training tests on 8 L20 GPUs with rmpad using function rm and save ckpt 57 | run: | 58 | ray stop --force 59 | bash tests/e2e/run_qwen_gsm8k_function_rm.sh sglang 60 | 61 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | VERL_USE_MODELSCOPE=True \ 4 | python3 -m verl.trainer.main_ppo \ 5 | algorithm.adv_estimator=gae \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | data.filter_overlong_prompts=True \ 12 | data.truncation='error' \ 13 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ 18 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 19 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 20 | actor_rollout_ref.actor.use_kl_loss=False \ 21 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 22 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 26 | critic.optim.lr=1e-5 \ 27 | critic.model.use_remove_padding=True \ 28 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 29 | critic.model.enable_gradient_checkpointing=True \ 30 | critic.ppo_micro_batch_size_per_gpu=32 \ 31 | critic.model.fsdp_config.param_offload=False \ 32 | critic.model.fsdp_config.optimizer_offload=False \ 33 | algorithm.use_kl_in_reward=False \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_example_gsm8k' \ 37 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=1 \ 42 | trainer.total_epochs=15 $@ 43 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo --config-path=config \ 6 | --config-name='ppo_megatron_trainer.yaml'\ 7 | algorithm.adv_estimator=grpo \ 8 | data.train_files=$HOME/data/gsm8k/train.parquet \ 9 | data.val_files=$HOME/data/gsm8k/test.parquet \ 10 | data.train_batch_size=1024 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=1024 \ 13 | data.filter_overlong_prompts=True \ 14 | data.truncation='error' \ 15 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 16 | actor_rollout_ref.actor.optim.lr=1e-6 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 19 | actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \ 20 | actor_rollout_ref.actor.megatron.virtual_pipeline_model_parallel_size=2 \ 21 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=4 \ 22 | actor_rollout_ref.actor.use_kl_loss=True \ 23 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 24 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 25 | actor_rollout_ref.actor.entropy_coeff=0 \ 26 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 27 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 29 | actor_rollout_ref.rollout.name=vllm \ 30 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 31 | actor_rollout_ref.rollout.n=5 \ 32 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ 33 | algorithm.use_kl_in_reward=False \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_grpo_example_gsm8k' \ 37 | trainer.experiment_name='qwen2_7b_function_rm_megatron' \ 38 | trainer.n_gpus_per_node=16 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=5 \ 42 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /tests/e2e/run_qwen_grpo.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 6 | 7 | python3 -m verl.trainer.main_ppo \ 8 | algorithm.adv_estimator=grpo \ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.train_batch_size=1024 \ 12 | data.max_prompt_length=512 \ 13 | data.max_response_length=1024 \ 14 | data.filter_overlong_prompts=True \ 15 | data.truncation='error' \ 16 | actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ 17 | actor_rollout_ref.actor.optim.lr=1e-6 \ 18 | actor_rollout_ref.model.use_remove_padding=True \ 19 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 20 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ 21 | actor_rollout_ref.actor.use_kl_loss=True \ 22 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 23 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 24 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 25 | actor_rollout_ref.actor.fsdp_config.param_offload=True \ 26 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ 27 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 29 | actor_rollout_ref.rollout.name=vllm \ 30 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 31 | actor_rollout_ref.rollout.n=5 \ 32 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 33 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 34 | algorithm.use_kl_in_reward=False \ 35 | trainer.critic_warmup=0 \ 36 | trainer.logger=['console'] \ 37 | trainer.project_name='verl_grpo_example_gsm8k' \ 38 | trainer.experiment_name='qwen2_7b_function_rm' \ 39 | trainer.n_gpus_per_node=8 \ 40 | trainer.nnodes=1 \ 41 | trainer.save_freq=-1 \ 42 | trainer.test_freq=5 \ 43 | trainer.total_epochs=15 \ 44 | trainer.total_training_steps=2 $@ -------------------------------------------------------------------------------- /verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from megatron.core import parallel_state as mpu 19 | 20 | 21 | def mark_parameter_as_sequence_parallel(parameter): 22 | setattr(parameter, 'sequence_parallel', True) 23 | 24 | 25 | def is_sequence_parallel_param(param): 26 | return hasattr(param, 'sequence_parallel') and param.sequence_parallel 27 | 28 | 29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor): 30 | """pad the tokens such that the total length is a multiple of sp world size 31 | 32 | Args: 33 | unpad_tokens: (total_nnz, ...). Tokens after removing padding 34 | 35 | Returns: 36 | 37 | """ 38 | total_nnz = unpad_tokens.shape[0] 39 | sp_world_size = mpu.get_tensor_model_parallel_world_size() 40 | 41 | if total_nnz % sp_world_size == 0: 42 | pad_size = 0 43 | else: 44 | pad_size = sp_world_size - total_nnz % sp_world_size 45 | 46 | if pad_size > 0: 47 | if unpad_tokens.ndim == 1: 48 | unpad_tokens = F.pad(unpad_tokens, (0, pad_size)) 49 | elif unpad_tokens.ndim == 2: 50 | unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size)) 51 | else: 52 | raise NotImplementedError(f'Padding dim {unpad_tokens.ndim()} is not supported') 53 | 54 | return unpad_tokens 55 | -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:ngc-th2.4.0-cu124-vllm0.6.3-ray2.4-te1.7-v0.0.6" -f docker/Dockerfile.ngc.vllm . --builder cloud-verlai-verl-builder --progress=plain --push 2 | FROM nvcr.io/nvidia/pytorch:24.05-py3 3 | 4 | # uninstall nv-pytorch fork 5 | RUN pip3 uninstall pytorch-quantization \ 6 | pytorch-triton \ 7 | torch \ 8 | torch-tensorrt \ 9 | torchvision \ 10 | xgboost transformer_engine flash_attn \ 11 | apex megatron-core -y 12 | 13 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124 14 | 15 | # =============== Megatron dependencies (optional) ================= 16 | # install apex, set MAX_JOBS to avoid OOMs 17 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 18 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 19 | git+https://github.com/NVIDIA/apex 20 | # =============== End of Megatron dependencies (optional) ================= 21 | 22 | RUN pip3 install --no-cache-dir \ 23 | accelerate \ 24 | codetiming \ 25 | datasets \ 26 | dill \ 27 | hydra-core \ 28 | numpy \ 29 | 'pandas' \ 30 | 'peft' \ 31 | 'pyarrow>=15.0.0' \ 32 | 'pybind11' \ 33 | 'pylatexenc' \ 34 | 'ray>=2.10' \ 35 | 'tensordict<0.6' \ 36 | 'transformers' \ 37 | 'vllm==0.6.3.post1' \ 38 | 'wandb' 39 | 40 | # full dependencies 41 | RUN pip3 install pytest yapf py-spy pyext liger-kernel 42 | 43 | # =============== Megatron dependencies (optional) ================= 44 | # install Transformer Engine, which requires FA 2.5.8. Do it in a separate step for docker cache 45 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation 46 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0 47 | # =============== End of Megatron dependencies (optional) ================= 48 | -------------------------------------------------------------------------------- /tests/ray/test_ray_local_envs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | e2e test verl.single_controller.ray 16 | """ 17 | import os 18 | import ray 19 | 20 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup 21 | from verl.single_controller.base.worker import Worker 22 | from verl.single_controller.base.decorator import register, Dispatch, collect_all_to_all, Execute 23 | 24 | 25 | @ray.remote 26 | class TestActor(Worker): 27 | 28 | def __init__(self) -> None: 29 | super().__init__() 30 | 31 | def getenv(self, key): 32 | val = os.getenv(key, f"{key} not set") 33 | return val 34 | 35 | 36 | def test_basics(): 37 | ray.init() 38 | 39 | # create 4 workers, each hold a GPU 40 | resource_pool = RayResourcePool([4], use_gpu=True) 41 | class_with_args = RayClassWithInitArgs(cls=TestActor) 42 | 43 | worker_group = RayWorkerGroup(resource_pool=resource_pool, 44 | ray_cls_with_init=class_with_args, 45 | name_prefix="worker_group_basic") 46 | 47 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE") 48 | assert output == ["4", "4", "4", "4"] 49 | 50 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK") 51 | assert set(output) == set(["0", "1", "2", "3"]) 52 | 53 | ray.shutdown() 54 | 55 | 56 | if __name__ == '__main__': 57 | test_basics() 58 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | import torch 17 | from megatron.core import ModelParallelConfig 18 | from torch import nn 19 | from transformers import LlamaConfig 20 | 21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 22 | from verl.utils.megatron import sequence_parallel as sp_utils 23 | 24 | 25 | class ParallelLlamaRMSNorm(nn.Module): 26 | 27 | def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig): 28 | """ 29 | LlamaRMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine(input=hidden_states, 43 | weight=self.weight, 44 | normalized_shape=self.normalized_shape, 45 | eps=self.variance_epsilon, 46 | memory_efficient=True) -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | import torch 17 | from megatron.core import ModelParallelConfig 18 | from torch import nn 19 | from transformers import Qwen2Config 20 | 21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 22 | from verl.utils.megatron import sequence_parallel as sp_utils 23 | 24 | 25 | class ParallelQwen2RMSNorm(nn.Module): 26 | 27 | def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig): 28 | """ 29 | Qwen2RMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine(input=hidden_states, 43 | weight=self.weight, 44 | normalized_shape=self.normalized_shape, 45 | eps=self.variance_epsilon, 46 | memory_efficient=True) -------------------------------------------------------------------------------- /tests/e2e/run_qwen2vl_geo3k_function_rm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | huggingface-cli download Qwen/Qwen2-VL-2B-Instruct --local-dir $HOME/models/Qwen/Qwen2-VL-2B-Instruct 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/geo3k/train.parquet \ 7 | data.val_files=$HOME/data/geo3k/test.parquet \ 8 | data.train_batch_size=128 \ 9 | data.max_prompt_length=1536 \ 10 | data.max_response_length=1536 \ 11 | data.image_key=images \ 12 | actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2-VL-2B-Instruct \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.use_kl_loss=True \ 18 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 19 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 20 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 21 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 23 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 24 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 25 | actor_rollout_ref.rollout.name=vllm \ 26 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 27 | actor_rollout_ref.rollout.enable_chunked_prefill=False \ 28 | actor_rollout_ref.rollout.enforce_eager=True \ 29 | actor_rollout_ref.rollout.free_cache_engine=False \ 30 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | algorithm.use_kl_in_reward=False \ 33 | algorithm.adv_estimator=grpo \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console'] \ 36 | trainer.project_name='verl_example_geo3k' \ 37 | trainer.experiment_name='qwen2vl_e2e_ci_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.total_training_steps=1 $@ 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | **/*.pt 3 | **/checkpoints 4 | **/wget-log 5 | **/_build/ 6 | **/*.ckpt 7 | **/outputs 8 | **/*.tar.gz 9 | **/playground 10 | **/wandb 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | dataset/* 17 | tensorflow/my_graph/* 18 | .idea/ 19 | # C extensions 20 | *.so 21 | 22 | # Distribution / packaging 23 | .Python 24 | env/ 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | tmp/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *,cover 60 | .hypothesis/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # IPython Notebook 84 | .ipynb_checkpoints 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # celery beat schedule file 90 | celerybeat-schedule 91 | 92 | # dotenv 93 | .env 94 | 95 | # virtualenv 96 | venv/ 97 | .venv/ 98 | ENV/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # vscode 107 | .vscode 108 | 109 | # Mac 110 | .DS_Store 111 | 112 | # output logs 113 | tests/e2e/toy_examples/deepspeed/synchronous/output.txt 114 | 115 | # vim 116 | *.swp 117 | 118 | # ckpt 119 | *.lock 120 | 121 | # data 122 | *.parquet 123 | 124 | 125 | # local logs 126 | logs 127 | log 128 | outputs 129 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=gae \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=32 \ 17 | actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \ 18 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 19 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 20 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 21 | actor_rollout_ref.actor.use_kl_loss=False \ 22 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=64 \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 26 | critic.optim.lr=1e-5 \ 27 | critic.ulysses_sequence_parallel_size=2 \ 28 | critic.model.use_remove_padding=True \ 29 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 30 | critic.model.enable_gradient_checkpointing=True \ 31 | critic.ppo_micro_batch_size_per_gpu=64 \ 32 | critic.model.fsdp_config.param_offload=False \ 33 | critic.model.fsdp_config.optimizer_offload=False \ 34 | algorithm.use_kl_in_reward=False \ 35 | trainer.critic_warmup=0 \ 36 | trainer.logger=['console','wandb'] \ 37 | trainer.project_name='verl_example_gsm8k' \ 38 | trainer.experiment_name='deepseek_llm_7b_function_rm_sp2' \ 39 | trainer.n_gpus_per_node=8 \ 40 | trainer.nnodes=1 \ 41 | trainer.save_freq=-1 \ 42 | trainer.test_freq=5 \ 43 | trainer.total_epochs=15 $@ 44 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models 15 | 16 | from typing import Dict, Union, Optional, Iterable, Tuple 17 | 18 | import torch 19 | import torch.nn as nn 20 | 21 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 22 | from vllm.model_executor.model_loader.weight_utils import default_weight_loader 23 | 24 | 25 | def update_hf_weight_loader(): 26 | print('no hf weight loader need to be updated') 27 | return 28 | 29 | 30 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 31 | assert isinstance(actor_weights, Dict) 32 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 33 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys(): 34 | del actor_weights["lm_head.weight"] 35 | vllm_model.load_weights(actor_weights.items()) 36 | for _, module in vllm_model.named_modules(): 37 | quant_method = getattr(module, "quant_method", None) 38 | if quant_method is not None: 39 | quant_method.process_weights_after_loading(module) 40 | # FIXME: Remove this after Mixtral is updated 41 | # to use quant_method. 42 | if hasattr(module, "process_weights_after_loading"): 43 | module.process_weights_after_loading() 44 | vllm_model = vllm_model.cuda() 45 | --------------------------------------------------------------------------------