├── .gitignore ├── LICENSE ├── Notice.txt ├── README.md ├── data └── kk │ └── instruct │ ├── 3ppl │ ├── test.parquet │ └── train.parquet │ ├── 4ppl │ ├── test.parquet │ └── train.parquet │ ├── 5ppl │ ├── test.parquet │ └── train.parquet │ ├── 6ppl │ ├── test.parquet │ └── train.parquet │ └── 7ppl │ ├── test.parquet │ └── train.parquet ├── docker ├── Dockerfile.ngc.vllm └── Dockerfile.vemlp.vllm.te ├── docs ├── Makefile ├── README.md ├── _static │ └── logo.png ├── advance │ ├── dpo_extension.rst │ ├── fsdp_extension.rst │ ├── megatron_extension.rst │ └── placement.rst ├── conf.py ├── examples │ ├── config.rst │ ├── gsm8k_example.rst │ └── ppo_code_architecture.rst ├── experiment │ └── ppo.rst ├── faq │ └── faq.rst ├── index.rst ├── preparation │ ├── prepare_data.rst │ └── reward_function.rst ├── requirements-docs.txt ├── start │ ├── install.rst │ └── quickstart.rst └── workers │ ├── fsdp_workers.rst │ ├── megatron_workers.rst │ └── ray_trainer.rst ├── eval_kk ├── compute_score.py ├── eval.sh ├── kk_processor.py ├── kk_prompt.py └── main_eval_instruct.py ├── examples ├── data_preprocess │ ├── arth.py │ ├── countdown.py │ ├── full_hh_rlhf.py │ ├── gsm8k.py │ ├── hellaswag.py │ ├── kk.py │ ├── math_dataset.py │ └── multiply.py ├── generation │ └── run_deepseek_v2_lite_math.sh ├── grpo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_qwen2-7b.sh │ └── run_qwen2-7b_seq_balance.sh ├── ppo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_gemma.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2.5-32b.sh │ └── verl_getting_started.ipynb ├── ray │ └── tutorial.ipynb ├── sft │ └── gsm8k │ │ ├── run_deepseek_6b7.sh │ │ ├── run_gemma_2b.sh │ │ └── run_gemma_7b.sh └── split_placement │ ├── README.md │ ├── config │ └── ppo_trainer_split.yaml │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py ├── main_grpo.sh ├── math_eval ├── aime_2021_2024.jsonl ├── amc.jsonl ├── auto_test_aime.sh ├── test_aime.py ├── test_aime.sh ├── test_amc.py └── test_amc.sh ├── patches └── megatron_v4.patch ├── pics ├── response.png ├── response_mean_length.png ├── response_mean_length_v2.png ├── teaser.png └── test_score_plot_v1.jpg ├── pyproject.toml ├── requirements.txt ├── scripts ├── curriculum.sh ├── format.sh ├── train_grpo_4gpu_7Binstruct.sh ├── train_ppo_3B_4gpu.sh ├── train_ppo_7B_4gpu.sh └── train_reinforce_plus_4gpu_7Binstruct.sh ├── setup.py ├── tests ├── __init__.py ├── e2e │ ├── __init__.py │ ├── arithmetic_sequence │ │ ├── data │ │ │ ├── create_dataset.py │ │ │ ├── test.parquet │ │ │ └── train.parquet │ │ ├── model │ │ │ ├── config.json │ │ │ ├── create_model_tokenizer.py │ │ │ ├── generation_config.json │ │ │ ├── model.safetensors │ │ │ └── tokenizer_config.json │ │ └── rl │ │ │ ├── README.md │ │ │ ├── config │ │ │ └── ray_trainer.yaml │ │ │ └── main_trainer.py │ ├── check_results.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ ├── __init__.py │ │ │ ├── task.py │ │ │ └── tokenizer.py │ ├── run_qwen_gsm8k_function_rm.sh │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm.sh │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ ├── run_ray_trainer.sh │ └── run_ray_trainer_rmpad.sh ├── gpu_utility │ ├── test_memory_buffers.py │ ├── test_ops.py │ └── test_torch_functional.py ├── model │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── ray │ ├── check_worker_alive │ │ └── main.py │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_check_worker_alive.py │ ├── test_colocated_workers.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_ray_local_envs.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── rollout │ ├── run_fsdp_vllm.py │ └── test_vllm_hf_loader.py ├── sanity │ ├── check_license.py │ └── test_import.py ├── utility │ └── test_tensor_dict_utilities.py └── verl │ └── utils │ └── dataset │ ├── test_rl_dataset.py │ ├── test_rm_dataset.py │ └── test_sft_dataset.py └── verl ├── __init__.py ├── models ├── README.md ├── __init__.py ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── llama.py │ ├── monkey_patch.py │ └── qwen2.py └── weight_loader_registry.py ├── protocol.py ├── single_controller ├── __init__.py ├── base │ ├── __init__.py │ ├── decorator.py │ ├── megatron │ │ ├── __init__.py │ │ ├── worker.py │ │ └── worker_group.py │ ├── register_center │ │ ├── __init__.py │ │ └── ray.py │ ├── worker.py │ └── worker_group.py ├── ray │ ├── __init__.py │ ├── base.py │ └── megatron.py └── version │ └── version ├── third_party ├── __init__.py └── vllm │ ├── __init__.py │ ├── vllm_v_0_3_1 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── tokenizer.py │ ├── weight_loaders.py │ └── worker.py │ ├── vllm_v_0_4_2 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ ├── vllm_v_0_5_4 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ └── vllm_v_0_6_3 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py ├── trainer ├── __init__.py ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ └── sft_trainer.yaml ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_generation.py ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── core_algos.py │ └── ray_trainer.py └── runtime_env.yaml ├── utils ├── __init__.py ├── config.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── rl_dataset.py │ ├── rm_dataset.py │ └── sft_dataset.py ├── debug │ ├── __init__.py │ ├── performance.py │ └── trajectory_tracker.py ├── distributed.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── hdfs_io.py ├── import_utils.py ├── logger │ ├── __init__.py │ └── aggregate_logger.py ├── logging_utils.py ├── megatron │ ├── __init__.py │ ├── memory.py │ ├── optimizer.py │ ├── optimizer_config.py │ ├── pipeline_parallel.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_utils.py ├── memory_buffer.py ├── model.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── countdown.py │ ├── gsm8k.py │ ├── kk.py │ ├── math.py │ └── multiply.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py └── ulysses.py ├── version └── version └── workers ├── __init__.py ├── actor ├── __init__.py ├── base.py ├── dp_actor.py └── megatron_actor.py ├── critic ├── __init__.py ├── base.py ├── dp_critic.py └── megatron_critic.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── base.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ └── naive_rollout.py ├── tokenizer.py └── vllm_rollout │ ├── __init__.py │ └── vllm_rollout.py └── sharding_manager ├── __init__.py ├── base.py ├── fsdp_ulysses.py ├── fsdp_vllm.py └── megatron_vllm.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.pt 2 | **/checkpoints 3 | **/wget-log 4 | **/_build/ 5 | **/*.ckpt 6 | **/outputs 7 | **/*.tar.gz 8 | **/playground 9 | **/wandb 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | dataset/* 16 | tensorflow/my_graph/* 17 | .idea/ 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *,cover 58 | .hypothesis/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # IPython Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | venv/ 95 | ENV/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # vscode 104 | .vscode 105 | 106 | # Mac 107 | .DS_Store 108 | 109 | # output logs 110 | tests/e2e/toy_examples/deepspeed/synchronous/output.txt 111 | 112 | # vim 113 | *.swp 114 | -------------------------------------------------------------------------------- /Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /data/kk/instruct/3ppl/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/3ppl/test.parquet -------------------------------------------------------------------------------- /data/kk/instruct/3ppl/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/3ppl/train.parquet -------------------------------------------------------------------------------- /data/kk/instruct/4ppl/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/4ppl/test.parquet -------------------------------------------------------------------------------- /data/kk/instruct/4ppl/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/4ppl/train.parquet -------------------------------------------------------------------------------- /data/kk/instruct/5ppl/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/5ppl/test.parquet -------------------------------------------------------------------------------- /data/kk/instruct/5ppl/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/5ppl/train.parquet -------------------------------------------------------------------------------- /data/kk/instruct/6ppl/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/6ppl/test.parquet -------------------------------------------------------------------------------- /data/kk/instruct/6ppl/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/6ppl/train.parquet -------------------------------------------------------------------------------- /data/kk/instruct/7ppl/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/7ppl/test.parquet -------------------------------------------------------------------------------- /data/kk/instruct/7ppl/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/7ppl/train.parquet -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:24.05-py3 2 | 3 | # uninstall nv-pytorch fork 4 | RUN pip3 uninstall pytorch-quantization \ 5 | pytorch-triton \ 6 | torch \ 7 | torch-tensorrt \ 8 | torchvision \ 9 | xgboost transformer_engine flash_attn \ 10 | apex megatron-core -y 11 | 12 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124 13 | 14 | # make sure torch version is kept 15 | RUN pip3 install --no-cache-dir \ 16 | "torch==2.4.0" \ 17 | accelerate \ 18 | codetiming \ 19 | datasets \ 20 | dill \ 21 | hydra-core \ 22 | numpy \ 23 | pybind11 \ 24 | tensordict \ 25 | "transformers<=4.46.0" 26 | 27 | # ray is installed via vllm 28 | RUN pip3 install --no-cache-dir vllm==0.6.3 29 | 30 | # we choose flash-attn v2.7.0 or v2.7.2 which contain pre-built wheels 31 | RUN pip3 install --no-cache-dir --no-build-isolation flash-attn==2.7.0.post2 32 | 33 | # install apex, set MAX_JOBS to avoid OOMs 34 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 35 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 36 | git+https://github.com/NVIDIA/apex 37 | 38 | # install Transformer Engine, which requires FA 2.5.8 39 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation 40 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7 41 | 42 | # Pin wandb to v0.18 since v0.19.1 is released with ImportError 43 | RUN pip3 install wandb==0.18.7 py-spy 44 | -------------------------------------------------------------------------------- /docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE . 2 | 3 | # the one in docker.io is an alias for the one veturbo 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN pip3 config unset global.index-url 10 | 11 | # transformers 4.47.0 contains the following bug: 12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask' 13 | RUN pip3 install --no-cache-dir \ 14 | torch==2.4.0 \ 15 | accelerate \ 16 | codetiming \ 17 | dill \ 18 | hydra-core \ 19 | numpy \ 20 | pybind11 \ 21 | tensordict \ 22 | "transformers <= 4.46.0" 23 | 24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation 25 | 26 | # vllm depends on ray, and veRL does not support ray > 2.37 27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10 28 | 29 | # install apex 30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 31 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 32 | git+https://github.com/NVIDIA/apex 33 | 34 | # install Transformer Engine 35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req 36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM 37 | # - cudnn is required by TransformerEngine 38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \ 39 | # pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0 40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation 41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7 42 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # veRL documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and open localhost:8000. -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- 1 | Add models with the Megatron-LM backend 2 | ========================================= 3 | 4 | Model 5 | ----------- 6 | 7 | The most challenging aspect to use the Megatron-LM backend is implementing 8 | the models for training. Currently, we implement Llama model that 9 | support data parallelism, tensor parallelism, pipeline parallelism (also 10 | vPP) and sequence parallelism. We also implement remove padding (sequence packing) on Llama 11 | model, which can be found in `modeling_llama_megatron.py `_. 12 | 13 | To support other model, users are required to implement: 14 | 15 | 1. Implemnt a model similar to ``modeling_llama_megatron.py`` that satisfy the 16 | parallelism requirements of Megatron-LM. Then register your model in 17 | the `registry.py `_. 18 | 2. Checkpoint utils that can load full checkpoint (e.g. huggingface 19 | checkpoint) to partitioned models during the runtime. Then register 20 | your loader to ``weight_loader_registry`` in `weight_loader_registry.py `_. 21 | 3. Weight loader that synchronize the weight from Megatron to rollout 22 | (vLLM) model. Note that both the actor model and rollout model are 23 | partitioned during runtime. So, it's advisable to map the model name 24 | in actor model implementation. Otherwise, you may need an additional 25 | name mapping and even weight transformation. The weight loader implementation 26 | is in `megatron_weight_loaders.py `_. -------------------------------------------------------------------------------- /docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /docs/faq/faq.rst: -------------------------------------------------------------------------------- 1 | Frequently Asked Questions 2 | ==================================== 3 | 4 | Ray related 5 | ------------ 6 | 7 | How to add breakpoint for debugging with distributed Ray? 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | Please checkout the official debugging guide from Ray: https://docs.ray.io/en/latest/ray-observability/ray-distributed-debugger.html 11 | 12 | 13 | Distributed training 14 | ------------------------ 15 | 16 | How to run multi-node post-training with Ray? 17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 18 | 19 | You can start a ray cluster and submit a ray job, following the official guide from Ray: https://docs.ray.io/en/latest/ray-core/starting-ray.html 20 | -------------------------------------------------------------------------------- /docs/preparation/reward_function.rst: -------------------------------------------------------------------------------- 1 | Implement Reward Function for Dataset 2 | ====================================== 3 | 4 | For each dataset, we need to implement a reward function or utilize a reward model to compute the rewards for the generated responses. 5 | We already pre-implemented some reward functions in `reward_score directory `_. 6 | 7 | Currently, we support reward functions for GSM8k and MATH datasets. For RLHF datasets (e.g., 8 | full_hh_rlhf) and Code Generation (e.g., APPS), we utilize reward model 9 | and SandBox (will opensource soon) for evaluation respectively. 10 | 11 | RewardManager 12 | ------------- 13 | 14 | In the entrypoint of the PPO Post-Training script `main_ppo.py `_, 15 | we implement a ``RewardManager`` that utilze pre-implemented reward functions to compute the scores for each response. 16 | 17 | In the ``RewardManager``, we implemented a ``__call__`` function to 18 | compute the score for each response. 19 | All the reward functions are executed by ``compute_score_fn``. 20 | The input is a ``DataProto``, which includes: 21 | 22 | - ``input_ids``, ``attention_mask``: ``input_ids`` and ``attention_mask`` after applying 23 | chat_template, including prompt and response 24 | - ``responses``: response tokens 25 | - ``ground_truth``: The ground truth string of the current prompt. 26 | Stored in ``non_tensor_batch`` in the ``DataProto``, which should be 27 | preprocessed in the parquet files. 28 | - ``data_source``: The dataset name of the current prompt. Stored in 29 | ``non_tensor_batch`` in the ``DataProto``, which should be 30 | preprocessed in the parquet files. 31 | 32 | After detokenize the responses, the responses string and the ground 33 | truth string will be input to the ``compute_score_fn`` to compute the 34 | score for each response. 35 | 36 | Reward Functions 37 | ---------------- 38 | We already pre-implemented some reward functions in `reward_score directory `_. 39 | 40 | - In the `GSM8k example `_, we 41 | force the response to output the final answer after four ####, then 42 | use string matching to compare with the ground truth. If completely 43 | correct, score 1 point; if the format is correct, score 0.1 points; if 44 | the format is incorrect, score 0 points. 45 | - In the `MATH example `_, we follow 46 | the implementation in `lm-evaluation-harness repository `_. 47 | -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme -------------------------------------------------------------------------------- /eval_kk/eval.sh: -------------------------------------------------------------------------------- 1 | model="xxx" #model path 2 | config="vllm" 3 | num_limit=100 4 | max_token=8192 5 | ntrain=0 6 | split="test" 7 | log_path="log/stage1_1000step" 8 | 9 | mkdir -p ${log_path} 10 | 11 | for eval_nppl in 2 3 4 5 6 7 8; do 12 | log_file="${log_path}/${eval_nppl}.log" 13 | echo "Starting job for eval_nppl: $eval_nppl, logging to $log_file" 14 | 15 | CUDA_VISIBLE_DEVICES=$((eval_nppl - 1)) PYTHONUNBUFFERED=1 python main_eval_instruct.py --batch_size 8 --model ${model} --max_token ${max_token} \ 16 | --ntrain ${ntrain} --config ${config} --limit ${num_limit} --split ${split} --temperature 1.0 --top_p 1.0 \ 17 | --problem_type "clean" --eval_nppl ${eval_nppl} > "$log_file" 2>&1 & 18 | done & -------------------------------------------------------------------------------- /eval_kk/kk_prompt.py: -------------------------------------------------------------------------------- 1 | system_instruction='''Your task is to solve a logical reasoning problem. You are given set of statements from which you must logically deduce the identity of a set of characters. 2 | 3 | You must infer the identity of each character. First, explain your reasoning. At the end of your answer, you must clearly state the identity of each character by following the format: 4 | 5 | CONCLUSION: 6 | (1) ... 7 | (2) ... 8 | (3) ... 9 | ''' 10 | 11 | 12 | system_instruction_no_reason='''Your task is to solve a logical reasoning problem. You are given set of statements from which you must logically deduce the identity of a set of characters. 13 | 14 | You must infer the identity of each character. At the end of your answer, you must clearly state the identity of each character by following the format: 15 | 16 | CONCLUSION: 17 | (1) ... 18 | (2) ... 19 | (3) ... 20 | ''' 21 | 22 | demonstration_2char_no_reason='''### Question: A very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 2 inhabitants: Jack, and Sophia. Jack tells you that Sophia is not a knave. Sophia says that If Jack is a knight then Sophia is a knight. So who is a knight and who is a knave? 23 | ### Answer: 24 | CONCLUSION: 25 | (1) Jack is a knight 26 | (2) Sophia is a knight 27 | ''' 28 | 29 | 30 | 31 | demonstration_2char='''### Question: A very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 2 inhabitants: Ella, and Penelope. In a statement by Ella: \"Ella is a knight or Penelope is a knight\". According to Penelope, \"Ella is a knave if and only if Penelope is a knight\". So who is a knight and who is a knave? 32 | ### Answer: Let's think step by step, by considering whether each person is lying and if that leads to contradiction. Assume Ella is a knight. Penelope cannot be a knight, because this would contradict the claim of their own. Penelope cannot be a knave, because this would contradict the false claim of their own. We have exhausted all possibilities for Penelope, so let us go back and reconsider Ella. Assume Ella is a knave. Penelope cannot be a knight, because this would contradict the false claim of Ella. Assume Penelope is a knave. This leads to a feasible solution. 33 | CONCLUSION: 34 | (1) Ella is a knave 35 | (2) Penelope is a knave 36 | ''' 37 | 38 | 39 | -------------------------------------------------------------------------------- /examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- 1 | python3 -m verl.trainer.main_generation \ 2 | trainer.nnodes=1 \ 3 | trainer.n_gpus_per_node=8 \ 4 | data.path=~/data/rlhf/gsm8k/test.parquet \ 5 | data.prompt_key=prompt \ 6 | data.n_samples=1 \ 7 | data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \ 8 | model.path=deepseek-ai/deepseek-llm-7b-chat \ 9 | +model.trust_remote_code=True \ 10 | rollout.temperature=1.0 \ 11 | rollout.top_k=50 \ 12 | rollout.top_p=0.7 \ 13 | rollout.prompt_length=2048 \ 14 | rollout.response_length=1024 \ 15 | rollout.tensor_model_parallel_size=2 \ 16 | rollout.gpu_memory_utilization=0.8 17 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.val_batch_size=1312 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=1024 \ 11 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size=128 \ 16 | actor_rollout_ref.actor.use_kl_loss=True \ 17 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 18 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 19 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 20 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 21 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 23 | actor_rollout_ref.rollout.log_prob_micro_batch_size=256 \ 24 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 25 | actor_rollout_ref.rollout.name=vllm \ 26 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 27 | actor_rollout_ref.rollout.n=5 \ 28 | actor_rollout_ref.ref.log_prob_micro_batch_size=256 \ 29 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 30 | algorithm.kl_ctrl.kl_coef=0.001 \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console','wandb'] \ 33 | trainer.project_name='verl_grpo_example_gsm8k' \ 34 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 35 | trainer.n_gpus_per_node=8 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=-1 \ 38 | trainer.test_freq=5 \ 39 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.val_batch_size=1312 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 16 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 17 | actor_rollout_ref.actor.use_kl_loss=True \ 18 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 19 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 20 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 21 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 24 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 25 | actor_rollout_ref.rollout.name=vllm \ 26 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 27 | actor_rollout_ref.rollout.n=5 \ 28 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 29 | algorithm.kl_ctrl.kl_coef=0.001 \ 30 | trainer.critic_warmup=0 \ 31 | trainer.logger=['console','wandb'] \ 32 | trainer.project_name='verl_grpo_example_gsm8k' \ 33 | trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \ 34 | trainer.n_gpus_per_node=8 \ 35 | trainer.nnodes=1 \ 36 | trainer.save_freq=-1 \ 37 | trainer.test_freq=5 \ 38 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.val_batch_size=1312 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=1024 \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size=128 \ 18 | actor_rollout_ref.actor.use_kl_loss=True \ 19 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 20 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 21 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 22 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 24 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 25 | actor_rollout_ref.rollout.log_prob_micro_batch_size=256 \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 27 | actor_rollout_ref.rollout.name=vllm \ 28 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 29 | actor_rollout_ref.rollout.n=5 \ 30 | actor_rollout_ref.ref.log_prob_micro_batch_size=256 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_grpo_example_gsm8k' \ 36 | trainer.experiment_name='qwen2_7b_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=5 \ 41 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.val_batch_size=1312 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=1024 \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 18 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 19 | actor_rollout_ref.actor.use_kl_loss=True \ 20 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 21 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 22 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 23 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 24 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 25 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 27 | actor_rollout_ref.rollout.name=vllm \ 28 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 29 | actor_rollout_ref.rollout.n=5 \ 30 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_grpo_example_gsm8k' \ 35 | trainer.experiment_name='qwen2_7b_function_rm_kl1e-3' \ 36 | +trainer.val_before_train=False \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=5 \ 41 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=1024 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 11 | actor_rollout_ref.actor.optim.lr=1e-6 \ 12 | actor_rollout_ref.model.use_remove_padding=True \ 13 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 14 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 15 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 16 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=True \ 26 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size=32 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.grad_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_example_gsm8k' \ 36 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.total_epochs=15 $@ 41 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=1024 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 11 | actor_rollout_ref.actor.optim.lr=1e-6 \ 12 | actor_rollout_ref.model.use_remove_padding=True \ 13 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 14 | actor_rollout_ref.actor.ppo_micro_batch_size=128 \ 15 | actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \ 16 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 19 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size=256 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 24 | actor_rollout_ref.ref.log_prob_micro_batch_size=256 \ 25 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 26 | critic.optim.lr=1e-5 \ 27 | critic.ulysses_sequence_parallel_size=2 \ 28 | critic.model.use_remove_padding=True \ 29 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 30 | critic.model.enable_gradient_checkpointing=False \ 31 | critic.ppo_micro_batch_size=64 \ 32 | critic.model.fsdp_config.param_offload=False \ 33 | critic.model.fsdp_config.grad_offload=False \ 34 | critic.model.fsdp_config.optimizer_offload=False \ 35 | algorithm.kl_ctrl.kl_coef=0.001 \ 36 | trainer.critic_warmup=0 \ 37 | trainer.logger=['console','wandb'] \ 38 | trainer.project_name='verl_example_gsm8k' \ 39 | trainer.experiment_name='deepseek_llm_7b_function_rm_sp2' \ 40 | trainer.n_gpus_per_node=8 \ 41 | trainer.nnodes=1 \ 42 | trainer.save_freq=-1 \ 43 | trainer.test_freq=5 \ 44 | trainer.total_epochs=15 $@ 45 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | train_files=$HOME/data/full_hh_rlhf/rl/train.parquet 4 | test_files=$HOME/data/full_hh_rlhf/rl/train.parquet # no use 5 | 6 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 7 | data.train_files="$train_files" \ 8 | data.val_files="$test_files" \ 9 | data.train_batch_size=512 \ 10 | data.val_batch_size=128 \ 11 | data.max_prompt_length=128 \ 12 | data.max_response_length=128 \ 13 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size=16 \ 17 | actor_rollout_ref.rollout.log_prob_micro_batch_size=16 \ 18 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 19 | actor_rollout_ref.rollout.name=vllm \ 20 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 21 | actor_rollout_ref.ref.log_prob_micro_batch_size=16 \ 22 | actor_rollout_ref.ref.param_offload=False \ 23 | critic.optim.lr=1e-5 \ 24 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 25 | critic.model.enable_gradient_checkpointing=False \ 26 | critic.ppo_micro_batch_size=16 \ 27 | reward_model.enable=True \ 28 | reward_model.megatron.tensor_model_parallel_size=4 \ 29 | reward_model.model.path=deepseek-ai/deepseek-llm-7b-chat \ 30 | reward_model.micro_batch_size=16 \ 31 | reward_model.param_offload=False \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_megatron_full_hh_rlhf_examples' \ 36 | trainer.experiment_name='deepseek_llm_7b_model_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=5 \ 41 | trainer.total_epochs=100 $@ 42 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 5 | math_train_path=$HOME/data/math/train.parquet 6 | math_test_path=$HOME/data/math/test.parquet 7 | 8 | train_files="['$gsm8k_train_path', '$math_train_path']" 9 | test_files="['$gsm8k_test_path', '$math_test_path']" 10 | 11 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 12 | data.train_files="$train_files" \ 13 | data.val_files="$test_files" \ 14 | data.train_batch_size=1024 \ 15 | data.val_batch_size=6312 \ 16 | data.max_prompt_length=1024 \ 17 | data.max_response_length=512 \ 18 | actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 19 | actor_rollout_ref.actor.optim.lr=1e-6 \ 20 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 21 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 22 | actor_rollout_ref.rollout.log_prob_micro_batch_size=32 \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 26 | actor_rollout_ref.ref.log_prob_micro_batch_size=32 \ 27 | critic.optim.lr=1e-5 \ 28 | critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 29 | critic.model.enable_gradient_checkpointing=False \ 30 | critic.ppo_micro_batch_size=32 \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_megatron_math_gsm8k_examples' \ 35 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=5 \ 40 | trainer.total_epochs=100 $@ 41 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=1024 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 11 | actor_rollout_ref.actor.optim.lr=2e-6 \ 12 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 13 | actor_rollout_ref.actor.ppo_micro_batch_size=64 \ 14 | actor_rollout_ref.rollout.log_prob_micro_batch_size=64 \ 15 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 16 | actor_rollout_ref.rollout.name=vllm \ 17 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 18 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 19 | critic.optim.lr=2e-5 \ 20 | critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 21 | critic.model.enable_gradient_checkpointing=False \ 22 | critic.ppo_micro_batch_size=64 \ 23 | algorithm.kl_ctrl.kl_coef=0.001 \ 24 | trainer.critic_warmup=0 \ 25 | trainer.logger=['console','wandb'] \ 26 | trainer.project_name='verl_megatron_gsm8k_examples' \ 27 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 28 | trainer.n_gpus_per_node=8 \ 29 | trainer.nnodes=1 \ 30 | trainer.save_freq=-1 \ 31 | trainer.total_epochs=15 \ 32 | +trainer.val_before_train=False $@ 33 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=512 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=1024 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=google/gemma-2-2b-it \ 11 | actor_rollout_ref.actor.optim.lr=1e-6 \ 12 | actor_rollout_ref.model.use_remove_padding=True \ 13 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 14 | actor_rollout_ref.actor.ppo_micro_batch_size=4 \ 15 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 16 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size=4 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size=4 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=True \ 26 | critic.model.path=google/gemma-2-2b-it \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size=4 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.grad_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_example' \ 36 | trainer.experiment_name='gemma2b_function_rm' \ 37 | trainer.n_gpus_per_node=2 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=10 \ 41 | trainer.total_epochs=15 $@ 42 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 5 | math_train_path=$HOME/data/math/train.parquet 6 | math_test_path=$HOME/data/math/test.parquet 7 | 8 | train_files="['$gsm8k_train_path', '$math_train_path']" 9 | test_files="['$gsm8k_test_path', '$math_test_path']" 10 | 11 | python3 -m verl.trainer.main_ppo \ 12 | data.train_files="$train_files" \ 13 | data.val_files="$test_files" \ 14 | data.train_batch_size=1024 \ 15 | data.val_batch_size=6312 \ 16 | data.max_prompt_length=1024 \ 17 | data.max_response_length=512 \ 18 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 19 | actor_rollout_ref.actor.optim.lr=1e-6 \ 20 | actor_rollout_ref.model.use_remove_padding=True \ 21 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 22 | actor_rollout_ref.actor.ppo_micro_batch_size=16 \ 23 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 24 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 25 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 26 | actor_rollout_ref.rollout.log_prob_micro_batch_size=16 \ 27 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 28 | actor_rollout_ref.rollout.name=vllm \ 29 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 30 | actor_rollout_ref.ref.log_prob_micro_batch_size=16 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | critic.optim.lr=1e-5 \ 33 | critic.model.use_remove_padding=True \ 34 | critic.model.path=Qwen/Qwen2-7B-Instruct \ 35 | critic.model.enable_gradient_checkpointing=False \ 36 | critic.ppo_micro_batch_size=16 \ 37 | critic.model.fsdp_config.param_offload=False \ 38 | critic.model.fsdp_config.grad_offload=False \ 39 | critic.model.fsdp_config.optimizer_offload=False \ 40 | algorithm.kl_ctrl.kl_coef=0.001 \ 41 | trainer.critic_warmup=0 \ 42 | trainer.logger=['console','wandb'] \ 43 | trainer.project_name='verl_example' \ 44 | trainer.experiment_name='Qwen2-7B-Instruct_function_rm' \ 45 | trainer.n_gpus_per_node=8 \ 46 | trainer.nnodes=1 \ 47 | trainer.save_freq=-1 \ 48 | trainer.test_freq=10 \ 49 | trainer.total_epochs=15 $@ 50 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_rm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | # Discliamer: the model used in the script is only for academic example, 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 5 | math_train_path=$HOME/data/math/train.parquet 6 | math_test_path=$HOME/data/math/test.parquet 7 | 8 | train_files="['$gsm8k_train_path', '$math_train_path']" 9 | test_files="['$gsm8k_test_path', '$math_test_path']" 10 | 11 | export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues 12 | 13 | python3 -m verl.trainer.main_ppo \ 14 | data.train_files="$train_files" \ 15 | data.val_files="$test_files" \ 16 | data.train_batch_size=1024 \ 17 | data.val_batch_size=6312 \ 18 | data.max_prompt_length=1024 \ 19 | data.max_response_length=512 \ 20 | data.return_raw_chat=True \ 21 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 22 | actor_rollout_ref.actor.optim.lr=1e-6 \ 23 | actor_rollout_ref.model.use_remove_padding=True \ 24 | actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \ 25 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 26 | actor_rollout_ref.actor.ppo_micro_batch_size=16 \ 27 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 28 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 29 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 30 | actor_rollout_ref.rollout.log_prob_micro_batch_size=16 \ 31 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 32 | actor_rollout_ref.rollout.name=vllm \ 33 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 34 | actor_rollout_ref.ref.log_prob_micro_batch_size=16 \ 35 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 36 | critic.optim.lr=1e-5 \ 37 | critic.model.use_remove_padding=True \ 38 | critic.optim.lr_warmup_steps_ratio=0.05 \ 39 | critic.model.path=Qwen/Qwen2-7B-Instruct \ 40 | critic.model.enable_gradient_checkpointing=False \ 41 | critic.ppo_micro_batch_size=16 \ 42 | critic.model.fsdp_config.param_offload=False \ 43 | critic.model.fsdp_config.grad_offload=False \ 44 | critic.model.fsdp_config.optimizer_offload=False \ 45 | reward_model.enable=True \ 46 | reward_model.model.path=sfairXC/FsfairX-LLaMA3-RM-v0.1\ 47 | reward_model.model.use_remove_padding=True \ 48 | reward_model.model.fsdp_config.param_offload=True \ 49 | reward_model.micro_batch_size=16 \ 50 | algorithm.kl_ctrl.kl_coef=0.001 \ 51 | trainer.critic_warmup=0 \ 52 | trainer.logger=['console','wandb'] \ 53 | trainer.project_name='verl_example' \ 54 | trainer.experiment_name='Qwen2-7B-Instruct_hybrid_rm' \ 55 | trainer.n_gpus_per_node=8 \ 56 | trainer.nnodes=1 \ 57 | trainer.save_freq=-1 \ 58 | trainer.test_freq=5 \ 59 | trainer.total_epochs=15 $@ 60 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 5 | math_train_path=$HOME/data/math/train.parquet 6 | math_test_path=$HOME/data/math/test.parquet 7 | 8 | train_files="['$gsm8k_train_path', '$math_train_path']" 9 | test_files="['$gsm8k_test_path', '$math_test_path']" 10 | 11 | python3 -m verl.trainer.main_ppo \ 12 | data.train_files="$train_files" \ 13 | data.val_files="$test_files" \ 14 | data.train_batch_size=4096 \ 15 | data.val_batch_size=1312 \ 16 | data.max_prompt_length=4096 \ 17 | data.max_response_length=4096 \ 18 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 19 | actor_rollout_ref.actor.optim.lr=1e-6 \ 20 | actor_rollout_ref.model.use_remove_padding=True \ 21 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 22 | actor_rollout_ref.actor.ppo_mini_batch_size=512 \ 23 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 24 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 25 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 26 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 27 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 29 | actor_rollout_ref.rollout.name=vllm \ 30 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 31 | actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=24000 \ 32 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 33 | actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=24000 \ 34 | critic.optim.lr=1e-5 \ 35 | critic.model.use_remove_padding=True \ 36 | critic.model.path=Qwen/Qwen2-7B-Instruct \ 37 | critic.model.enable_gradient_checkpointing=True \ 38 | critic.ppo_max_token_len_per_gpu=98304 \ 39 | critic.model.fsdp_config.param_offload=False \ 40 | critic.model.fsdp_config.grad_offload=False \ 41 | critic.model.fsdp_config.optimizer_offload=False \ 42 | algorithm.kl_ctrl.kl_coef=0.001 \ 43 | trainer.critic_warmup=0 \ 44 | trainer.logger=['console','wandb'] \ 45 | trainer.project_name='verl_example_gsm8k' \ 46 | trainer.experiment_name='qwen2-7b_function_rm_bsz8k_p4k_r4k_seq_packing' \ 47 | trainer.n_gpus_per_node=8 \ 48 | +trainer.val_before_train=False \ 49 | trainer.nnodes=1 \ 50 | trainer.save_freq=-1 \ 51 | trainer.test_freq=5 \ 52 | trainer.total_epochs=15 $@ 53 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2.5-32b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 5 | math_train_path=$HOME/data/math/train.parquet 6 | math_test_path=$HOME/data/math/test.parquet 7 | 8 | train_files="['$gsm8k_train_path', '$math_train_path']" 9 | test_files="['$gsm8k_test_path', '$math_test_path']" 10 | 11 | python3 -m verl.trainer.main_ppo \ 12 | data.train_files="$train_files" \ 13 | data.val_files="$test_files" \ 14 | data.train_batch_size=1024 \ 15 | data.val_batch_size=6304 \ 16 | data.max_prompt_length=1024 \ 17 | data.max_response_length=1024 \ 18 | actor_rollout_ref.model.path=Qwen/Qwen2.5-32B-Instruct \ 19 | actor_rollout_ref.model.enable_gradient_checkpointing=False \ 20 | actor_rollout_ref.actor.optim.lr=1e-6 \ 21 | actor_rollout_ref.model.use_remove_padding=True \ 22 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 23 | actor_rollout_ref.actor.ppo_micro_batch_size=16 \ 24 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 25 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 26 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 27 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 29 | actor_rollout_ref.rollout.name=vllm \ 30 | actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \ 31 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 32 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 33 | critic.optim.lr=1e-5 \ 34 | critic.model.use_remove_padding=True \ 35 | critic.model.path=Qwen/Qwen2.5-32B-Instruct \ 36 | critic.model.enable_gradient_checkpointing=False \ 37 | critic.ppo_micro_batch_size=32 \ 38 | critic.model.fsdp_config.param_offload=False \ 39 | critic.model.fsdp_config.grad_offload=False \ 40 | critic.model.fsdp_config.optimizer_offload=False \ 41 | algorithm.kl_ctrl.kl_coef=0.0001 \ 42 | trainer.critic_warmup=0 \ 43 | trainer.logger=['console','wandb'] \ 44 | trainer.project_name='verl_example' \ 45 | trainer.experiment_name='Qwen2.5-32B-Instruct_function_rm' \ 46 | trainer.n_gpus_per_node=8 \ 47 | trainer.nnodes=4 \ 48 | trainer.save_freq=-1 \ 49 | trainer.test_freq=10 \ 50 | trainer.total_epochs=15 $@ 51 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/deepseek-coder-6.7b-instruct/ # replace to your own hdfs/local path 4 | 5 | nproc_per_node=$1 6 | 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 8 | -m verl.trainer.fsdp_sft_trainer \ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.prompt_key=prompt \ 12 | data.response_key=answer \ 13 | data.micro_batch_size=8 \ 14 | model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ 15 | trainer.default_hdfs_dir=$hdfs_path \ 16 | trainer.project_name=gsm8k-sft \ 17 | trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ 18 | trainer.total_epochs=4 \ 19 | trainer.logger=['console','wandb'] -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_gemma_2b.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | +data.prompt_dict_keys=['question'] \ 23 | +data.response_dict_keys=['answer'] \ 24 | data.micro_batch_size=8 \ 25 | model.partial_pretrain=google/gemma-2b-it \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 29 | trainer.total_epochs=2 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-1.1-7b-it/ # replace to your own hdfs/local path 4 | 5 | nproc_per_node=$1 6 | 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 8 | -m verl.trainer.fsdp_sft_trainer \ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.prompt_key=prompt \ 12 | data.response_key=answer \ 13 | data.micro_batch_size=8 \ 14 | model.partial_pretrain=google/gemma-1.1-7b-it \ 15 | trainer.default_hdfs_dir=$hdfs_path \ 16 | trainer.project_name=gsm8k-sft \ 17 | trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ 18 | trainer.total_epochs=4 \ 19 | trainer.logger=['console','wandb'] -------------------------------------------------------------------------------- /examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 main_ppo_split.py \ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=1024 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 11 | actor_rollout_ref.actor.optim.lr=1e-6 \ 12 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 13 | actor_rollout_ref.actor.ppo_micro_batch_size=16 \ 14 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 15 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 16 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 17 | actor_rollout_ref.rollout.log_prob_micro_batch_size=32 \ 18 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 19 | actor_rollout_ref.rollout.name=vllm \ 20 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 21 | actor_rollout_ref.ref.log_prob_micro_batch_size=32 \ 22 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 23 | critic.optim.lr=1e-5 \ 24 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 25 | critic.model.enable_gradient_checkpointing=False \ 26 | critic.ppo_micro_batch_size=16 \ 27 | critic.model.fsdp_config.param_offload=False \ 28 | critic.model.fsdp_config.grad_offload=False \ 29 | critic.model.fsdp_config.optimizer_offload=False \ 30 | algorithm.kl_ctrl.kl_coef=0.001 \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console','wandb'] \ 33 | trainer.project_name='verl_example_gsm8k' \ 34 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 35 | trainer.n_gpus_per_node=8 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=-1 \ 38 | trainer.total_epochs=15 $@ 39 | -------------------------------------------------------------------------------- /main_grpo.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | MODEL_PATH=xxx 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | python3 -m verl.trainer.main_ppo \ 5 | algorithm.adv_estimator=grpo \ 6 | data.train_files=data/xxx \ 7 | data.val_files=dataxxx \ 8 | data.train_batch_size=64 \ 9 | data.val_batch_size=32 \ 10 | data.max_prompt_length=400 \ 11 | data.max_response_length=2048 \ 12 | actor_rollout_ref.model.path=$MODEL_PATH\ 13 | actor_rollout_ref.actor.optim.lr=3e-7 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=32 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size=16 \ 17 | actor_rollout_ref.actor.use_kl_loss=True \ 18 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 19 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 20 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 21 | actor_rollout_ref.actor.fsdp_config.param_offload=True \ 22 | actor_rollout_ref.actor.fsdp_config.grad_offload=True \ 23 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size=160 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=16 \ 29 | actor_rollout_ref.ref.log_prob_micro_batch_size=160 \ 30 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['wandb'] \ 34 | trainer.project_name='GRPO_logic_KK' \ 35 | trainer.experiment_name='Qwen-7B' \ 36 | trainer.n_gpus_per_node=4 \ 37 | trainer.nnodes=1 \ 38 | trainer.default_local_dir=xxx \ 39 | trainer.default_hdfs_dir=null \ 40 | trainer.save_freq=10 \ 41 | trainer.test_freq=10 \ 42 | trainer.total_epochs=5 $@ 2>&1 | tee grpo.log 43 | -------------------------------------------------------------------------------- /math_eval/auto_test_aime.sh: -------------------------------------------------------------------------------- 1 | CHECKPOINT_PATH="$1"/actor 2 | mkdir -p /volume/ailab4sci/ztgao/log/"$2" 3 | mkdir -p /volume/ailab4sci/ztgao/aime/"$2" 4 | 5 | if [ ! -d "$CHECKPOINT_PATH" ]; then 6 | echo "提供的路径无效: $CHECKPOINT_PATH" 7 | exit 1 8 | fi 9 | 10 | CHECKPOINTS=($(find "$CHECKPOINT_PATH" -mindepth 1 -maxdepth 1 -type d)) 11 | 12 | declare -A GPU_LAST_USED_TIME 13 | declare -a CHECKPOINT_QUEUE 14 | 15 | check_gpu_free() { 16 | local gpu_id=$1 17 | local pid=$(nvidia-smi --query-compute-apps=pid --format=csv,noheader,nounits -i "$gpu_id") 18 | local current_time=$(date +%s) 19 | 20 | if [ -n "$pid" ]; then 21 | GPU_LAST_USED_TIME["$gpu_id"]=$current_time 22 | return 1 23 | fi 24 | 25 | if [ -z "${GPU_LAST_USED_TIME["$gpu_id"]}" ] || [ $((current_time - GPU_LAST_USED_TIME["$gpu_id"])) -gt 600 ]; then 26 | GPU_LAST_USED_TIME["$gpu_id"]=$current_time 27 | return 0 28 | else 29 | return 1 30 | fi 31 | } 32 | 33 | for CHECKPOINT in "${CHECKPOINTS[@]}"; do 34 | step=$(basename "$CHECKPOINT" | sed -E 's/[^0-9]*([0-9]+)$/\1/') 35 | log_path="/volume/ailab4sci/ztgao/log/$2/$step.log" 36 | 37 | if [ ! -f "$log_path" ]; then 38 | CHECKPOINT_QUEUE+=("$CHECKPOINT") 39 | echo "已添加 checkpoint $CHECKPOINT 到队列。" 40 | else 41 | echo "日志文件已存在: $log_path, 跳过此模型。" 42 | fi 43 | done 44 | 45 | while [ ${#CHECKPOINT_QUEUE[@]} -gt 0 ]; do 46 | CHECKPOINT=${CHECKPOINT_QUEUE[0]} 47 | step=$(basename "$CHECKPOINT" | sed -E 's/[^0-9]*([0-9]+)$/\1/') 48 | log_path="/volume/ailab4sci/ztgao/log/$2/$step.log" 49 | 50 | GPU_ID="" 51 | for i in $(seq 0 7); do 52 | if check_gpu_free "$i"; then 53 | GPU_ID=$i 54 | break 55 | fi 56 | done 57 | 58 | if [ -z "$GPU_ID" ]; then 59 | echo "没有空闲的 GPU,等待 30 秒后重试..." 60 | sleep 30 61 | continue 62 | fi 63 | 64 | json_path="/volume/ailab4sci/ztgao/aime/$2/$step.json" 65 | mkdir -p "$(dirname "$log_path")" 66 | echo "使用 GPU $GPU_ID 处理模型: $CHECKPOINT, log: $log_path" 67 | CUDA_VISIBLE_DEVICES=$GPU_ID python3 test_aime.py --model_path "$CHECKPOINT" --json_path "$json_path" > "$log_path" 2>&1 & 68 | CHECKPOINT_QUEUE=("${CHECKPOINT_QUEUE[@]:1}") 69 | sleep 2 70 | done 71 | 72 | wait 73 | echo "所有评估任务已完成。" 74 | -------------------------------------------------------------------------------- /math_eval/test_aime.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python test_aime.py --stage 1 --step 120 > log/aime/1/120.log 2>&1 & 2 | CUDA_VISIBLE_DEVICES=1 python test_aime.py --stage 1 --step 180 > log/aime/1/180.log 2>&1 & 3 | CUDA_VISIBLE_DEVICES=2 python test_aime.py --stage 1 --step 300 > log/aime/1/300.log 2>&1 & 4 | CUDA_VISIBLE_DEVICES=3 python test_aime.py --stage 1 --step 360 > log/aime/1/360.log 2>&1 & 5 | CUDA_VISIBLE_DEVICES=4 python test_aime.py --stage 1 --step 420 > log/aime/1/420.log 2>&1 & 6 | CUDA_VISIBLE_DEVICES=5 python test_aime.py --stage 1 --step 480 > log/aime/1/480.log 2>&1 & 7 | CUDA_VISIBLE_DEVICES=6 python test_aime.py --stage 1 --step 600 > log/aime/1/600.log 2>&1 & 8 | CUDA_VISIBLE_DEVICES=7 python test_aime.py --stage 1 --step 660 > log/aime/1/660.log 2>&1 & 9 | 10 | wait 11 | 12 | CUDA_VISIBLE_DEVICES=0 python test_aime.py --stage 1 --step 720 > log/aime/1/720.log 2>&1 & 13 | CUDA_VISIBLE_DEVICES=1 python test_aime.py --stage 1 --step 780 > log/aime/1/780.log 2>&1 & 14 | CUDA_VISIBLE_DEVICES=2 python test_aime.py --stage 1 --step 900 > log/aime/1/900.log 2>&1 & 15 | CUDA_VISIBLE_DEVICES=3 python test_aime.py --stage 1 --step 960 > log/aime/1/960.log 2>&1 & 16 | CUDA_VISIBLE_DEVICES=4 python test_aime.py --stage 1 --step 1020 > log/aime/1/1020.log 2>&1 & 17 | CUDA_VISIBLE_DEVICES=5 python test_aime.py --stage 1 --step 1080 > log/aime/1/1080.log 2>&1 & 18 | CUDA_VISIBLE_DEVICES=6 python test_aime.py --stage 1 --step 1320 > log/aime/1/1320.log 2>&1 & 19 | CUDA_VISIBLE_DEVICES=7 python test_aime.py --stage 1 --step 1380 > log/aime/1/1380.log 2>&1 & 20 | 21 | wait 22 | 23 | CUDA_VISIBLE_DEVICES=0 python test_aime.py --stage 1 --step 1440 > log/aime/1/1440.log 2>&1 & 24 | CUDA_VISIBLE_DEVICES=1 python test_aime.py --stage 1 --step 1560 > log/aime/1/1560.log 2>&1 & 25 | CUDA_VISIBLE_DEVICES=2 python test_aime.py --stage 1 --step 1620 > log/aime/1/1620.log 2>&1 & 26 | CUDA_VISIBLE_DEVICES=3 python test_aime.py --stage 1 --step 1680 > log/aime/1/1680.log 2>&1 & 27 | CUDA_VISIBLE_DEVICES=4 python test_aime.py --stage 1 --step 1740 > log/aime/1/1740.log 2>&1 & -------------------------------------------------------------------------------- /math_eval/test_amc.sh: -------------------------------------------------------------------------------- 1 | # CUDA_VISIBLE_DEVICES=0 python test_amc.py --stage 1 --step 120 > log/amc/1/120.log 2>&1 & 2 | # CUDA_VISIBLE_DEVICES=1 python test_amc.py --stage 1 --step 180 > log/amc/1/180.log 2>&1 & 3 | # CUDA_VISIBLE_DEVICES=2 python test_amc.py --stage 1 --step 300 > log/amc/1/300.log 2>&1 & 4 | # CUDA_VISIBLE_DEVICES=3 python test_amc.py --stage 1 --step 360 > log/amc/1/360.log 2>&1 & 5 | # CUDA_VISIBLE_DEVICES=4 python test_amc.py --stage 1 --step 420 > log/amc/1/420.log 2>&1 & 6 | # CUDA_VISIBLE_DEVICES=5 python test_amc.py --stage 1 --step 480 > log/amc/1/480.log 2>&1 & 7 | # CUDA_VISIBLE_DEVICES=6 python test_amc.py --stage 1 --step 600 > log/amc/1/600.log 2>&1 & 8 | # CUDA_VISIBLE_DEVICES=7 python test_amc.py --stage 1 --step 660 > log/amc/1/660.log 2>&1 & 9 | 10 | # wait 11 | 12 | CUDA_VISIBLE_DEVICES=0 python test_amc.py --stage 1 --step 720 > log/amc/1/720.log 2>&1 & 13 | CUDA_VISIBLE_DEVICES=1 python test_amc.py --stage 1 --step 780 > log/amc/1/780.log 2>&1 & 14 | CUDA_VISIBLE_DEVICES=2 python test_amc.py --stage 1 --step 900 > log/amc/1/900.log 2>&1 & 15 | CUDA_VISIBLE_DEVICES=3 python test_amc.py --stage 1 --step 960 > log/amc/1/960.log 2>&1 & 16 | CUDA_VISIBLE_DEVICES=4 python test_amc.py --stage 1 --step 1020 > log/amc/1/1020.log 2>&1 & 17 | CUDA_VISIBLE_DEVICES=5 python test_amc.py --stage 1 --step 1080 > log/amc/1/1080.log 2>&1 & 18 | CUDA_VISIBLE_DEVICES=6 python test_amc.py --stage 1 --step 1320 > log/amc/1/1320.log 2>&1 & 19 | CUDA_VISIBLE_DEVICES=7 python test_amc.py --stage 1 --step 1380 > log/amc/1/1380.log 2>&1 & 20 | 21 | # wait 22 | 23 | # CUDA_VISIBLE_DEVICES=0 python test_amc.py --stage 1 --step 1440 > log/amc/1/1440.log 2>&1 & 24 | # CUDA_VISIBLE_DEVICES=1 python test_amc.py --stage 1 --step 1560 > log/amc/1/1560.log 2>&1 & 25 | # CUDA_VISIBLE_DEVICES=2 python test_amc.py --stage 1 --step 1620 > log/amc/1/1620.log 2>&1 & 26 | # CUDA_VISIBLE_DEVICES=3 python test_amc.py --stage 1 --step 1680 > log/amc/1/1680.log 2>&1 & 27 | # CUDA_VISIBLE_DEVICES=4 python test_amc.py --stage 1 --step 1740 > log/amc/1/1740.log 2>&1 & -------------------------------------------------------------------------------- /pics/response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/response.png -------------------------------------------------------------------------------- /pics/response_mean_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/response_mean_length.png -------------------------------------------------------------------------------- /pics/response_mean_length_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/response_mean_length_v2.png -------------------------------------------------------------------------------- /pics/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/teaser.png -------------------------------------------------------------------------------- /pics/test_score_plot_v1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/test_score_plot_v1.jpg -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # ------------------------------- 2 | # build-system 3 | # ------------------------------- 4 | [build-system] 5 | requires = [ 6 | "setuptools>=61.0", 7 | "wheel" 8 | ] 9 | build-backend = "setuptools.build_meta" 10 | 11 | # ------------------------------- 12 | # project (PEP 621 metadata) 13 | # ------------------------------- 14 | [project] 15 | name = "verl" 16 | # We'll mark the version as "dynamic" because it's read from the file "verl/version/version" 17 | # (PEP 621 calls this "dynamic version"). 18 | # The actual version is specified in the [tool.setuptools.dynamic] section below. 19 | dynamic = ["version"] 20 | 21 | description = "veRL: Volcano Engine Reinforcement Learning for LLM" 22 | license = {file = "LICENSE"} # or "Apache-2.0", if you prefer an SPDX identifier 23 | readme = {file = "README.md", content-type = "text/markdown"} 24 | requires-python = ">=3.8" 25 | 26 | authors = [ 27 | { name = "Bytedance - Seed - MLSys", email = "zhangchi.usc1992@bytedance.com" }, 28 | { name = "Bytedance - Seed - MLSys", email = "gmsheng@connect.hku.hk" }, 29 | ] 30 | 31 | # Dependencies corresponding to install_requires in setup.py 32 | dependencies = [ 33 | "accelerate", 34 | "codetiming", 35 | "datasets", 36 | "dill", 37 | "hydra-core", 38 | "numpy", 39 | "pybind11", 40 | "ray", 41 | "tensordict", 42 | "transformers<4.48", 43 | "vllm<=0.6.3", 44 | ] 45 | 46 | # Optional dependencies (extras_require in setup.py) 47 | [project.optional-dependencies] 48 | test = [ 49 | "pytest", "yapf" 50 | ] 51 | 52 | # URLs 53 | [project.urls] 54 | Homepage = "https://github.com/volcengine/verl" 55 | 56 | # ------------------------------- 57 | # tool.setuptools - Additional config 58 | # ------------------------------- 59 | [tool.setuptools] 60 | # True means `setuptools` will attempt to include all relevant files in package_data automatically. 61 | # This corresponds to `include_package_data=True` in setup.py. 62 | include-package-data = true 63 | 64 | # We read the version from a file in 'verl/version/version' 65 | [tool.setuptools.dynamic] 66 | version = {file = "verl/version/version"} 67 | 68 | # If you need to mimic `package_dir={'': '.'}`: 69 | [tool.setuptools.package-dir] 70 | "" = "." 71 | 72 | # If you need to include specific non-Python data (like YAML files or version file): 73 | # This is the rough equivalent of package_data={'': ['version/*'], 'verl': ['trainer/config/*.yaml']} 74 | [tool.setuptools.package-data] 75 | verl = [ 76 | "version/*", 77 | "trainer/config/*.yaml" 78 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | codetiming 3 | datasets 4 | dill 5 | flash-attn 6 | hydra-core 7 | numpy 8 | pandas 9 | pybind11 10 | ray 11 | tensordict<0.6 12 | transformers<4.48 13 | vllm==0.6.3 14 | wandb 15 | -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | yapf -ir -vv --style ./.style.yapf verl tests single_controller examples -------------------------------------------------------------------------------- /scripts/train_grpo_4gpu_7Binstruct.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/train.parquet \ 8 | data.val_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/test.parquet \ 9 | data.train_batch_size=16 \ 10 | data.val_batch_size=16 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=2048 \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct-1M \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size=64 \ 18 | actor_rollout_ref.actor.use_kl_loss=True \ 19 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 20 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 21 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 22 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 24 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 25 | actor_rollout_ref.rollout.log_prob_micro_batch_size=160 \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 27 | actor_rollout_ref.rollout.name=vllm \ 28 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 29 | actor_rollout_ref.rollout.n=8 \ 30 | actor_rollout_ref.ref.log_prob_micro_batch_size=160 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['wandb'] \ 35 | trainer.project_name='verl_grpo_example_gsm8k' \ 36 | trainer.experiment_name='qwen2_7b_function_rm' \ 37 | trainer.n_gpus_per_node=4 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=20 \ 41 | trainer.total_epochs=1 $@ 42 | -------------------------------------------------------------------------------- /scripts/train_ppo_3B_4gpu.sh: -------------------------------------------------------------------------------- 1 | python3 -m verl.trainer.main_ppo \ 2 | data.train_files=$DATA_DIR/train.parquet \ 3 | data.val_files=$DATA_DIR/test.parquet \ 4 | data.train_batch_size=8 \ 5 | data.val_batch_size=8 \ 6 | data.max_prompt_length=300 \ 7 | data.max_response_length=1024 \ 8 | actor_rollout_ref.model.path=$BASE_MODEL \ 9 | actor_rollout_ref.actor.optim.lr=5e-6 \ 10 | actor_rollout_ref.actor.ppo_mini_batch_size=32 \ 11 | actor_rollout_ref.actor.ppo_micro_batch_size=4 \ 12 | actor_rollout_ref.rollout.log_prob_micro_batch_size=40 \ 13 | actor_rollout_ref.rollout.tensor_model_parallel_size=$ROLLOUT_TP_SIZE \ 14 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 15 | actor_rollout_ref.ref.log_prob_micro_batch_size=40 \ 16 | actor_rollout_ref.rollout.temperature=0.6 \ 17 | actor_rollout_ref.rollout.top_k=-1 \ 18 | actor_rollout_ref.rollout.top_p=0.8 \ 19 | actor_rollout_ref.rollout.n=4 \ 20 | critic.optim.lr=1e-6 \ 21 | critic.model.path=$BASE_MODEL \ 22 | critic.ppo_micro_batch_size=32 \ 23 | algorithm.kl_ctrl.kl_coef=0.001 \ 24 | trainer.logger=['wandb'] \ 25 | +trainer.val_before_train=False \ 26 | trainer.default_hdfs_dir=null \ 27 | trainer.n_gpus_per_node=$N_GPUS \ 28 | trainer.nnodes=1 \ 29 | trainer.save_freq=200 \ 30 | trainer.test_freq=20 \ 31 | trainer.project_name=KK \ 32 | trainer.experiment_name=$EXPERIMENT_NAME \ 33 | trainer.total_epochs=1 2>&1 | tee verl_demo.log 34 | -------------------------------------------------------------------------------- /scripts/train_ppo_7B_4gpu.sh: -------------------------------------------------------------------------------- 1 | python3 -m verl.trainer.main_ppo \ 2 | data.train_files=$DATA_DIR/train.parquet \ 3 | data.val_files=$DATA_DIR/test.parquet \ 4 | data.train_batch_size=4 \ 5 | data.val_batch_size=4 \ 6 | data.max_prompt_length=512 \ 7 | data.max_response_length=2048 \ 8 | actor_rollout_ref.model.path=$BASE_MODEL \ 9 | actor_rollout_ref.actor.optim.lr=1e-6 \ 10 | actor_rollout_ref.model.use_remove_padding=True \ 11 | actor_rollout_ref.actor.ppo_mini_batch_size=4 \ 12 | actor_rollout_ref.actor.ppo_micro_batch_size=4 \ 13 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 14 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 15 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 16 | actor_rollout_ref.rollout.temperature=0.6 \ 17 | actor_rollout_ref.rollout.top_k=1 \ 18 | actor_rollout_ref.rollout.do_sample=True \ 19 | actor_rollout_ref.rollout.n=2 \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size=4 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.2 \ 23 | actor_rollout_ref.ref.log_prob_micro_batch_size=4 \ 24 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 25 | critic.optim.lr=1e-5 \ 26 | critic.model.use_remove_padding=True \ 27 | critic.model.path=$BASE_MODEL \ 28 | critic.ppo_micro_batch_size=4 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.grad_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.logger=['wandb'] \ 34 | +trainer.val_before_train=False \ 35 | trainer.default_hdfs_dir=null \ 36 | trainer.n_gpus_per_node=$N_GPUS \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=100 \ 39 | trainer.test_freq=20 \ 40 | trainer.project_name=KK_logic \ 41 | trainer.experiment_name=$EXPERIMENT_NAME \ 42 | trainer.total_epochs=1 2>&1 | tee verl_demo.log -------------------------------------------------------------------------------- /scripts/train_reinforce_plus_4gpu_7Binstruct.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=reinforce_plus_plus \ 7 | data.train_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/train.parquet \ 8 | data.val_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/test.parquet \ 9 | data.train_batch_size=16 \ 10 | data.val_batch_size=16 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=2048 \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct-1M \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size=64 \ 18 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 19 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 20 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 21 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 23 | actor_rollout_ref.rollout.log_prob_micro_batch_size=160 \ 24 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 25 | actor_rollout_ref.rollout.name=vllm \ 26 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 27 | actor_rollout_ref.rollout.n=8 \ 28 | actor_rollout_ref.ref.log_prob_micro_batch_size=160 \ 29 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 30 | algorithm.kl_ctrl.kl_coef=0.001 \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['wandb'] \ 33 | trainer.project_name='GRPO_logic_KK' \ 34 | trainer.experiment_name='RF++-Qwen-7B-1M-3ppl-001' \ 35 | trainer.n_gpus_per_node=4 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=-1 \ 38 | trainer.test_freq=10 \ 39 | trainer.total_epochs=1 $@ 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # setup.py is the fallback installation script when pyproject.toml does not work 16 | from setuptools import setup, find_packages 17 | import os 18 | 19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 20 | 21 | with open(os.path.join(version_folder, 'verl/version/version')) as f: 22 | __version__ = f.read().strip() 23 | 24 | 25 | with open('requirements.txt') as f: 26 | required = f.read().splitlines() 27 | install_requires = [item.strip() for item in required if item.strip()[0] != '#'] 28 | 29 | extras_require = { 30 | 'test': ['pytest', 'yapf'] 31 | } 32 | 33 | from pathlib import Path 34 | this_directory = Path(__file__).parent 35 | long_description = (this_directory / "README.md").read_text() 36 | 37 | setup( 38 | name='verl', 39 | version=__version__, 40 | package_dir={'': '.'}, 41 | packages=find_packages(where='.'), 42 | url='https://github.com/volcengine/verl', 43 | license='Apache 2.0', 44 | author='Bytedance - Seed - MLSys', 45 | author_email='zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk', 46 | description='veRL: Volcano Engine Reinforcement Learning for LLM', 47 | install_requires=install_requires, 48 | extras_require=extras_require, 49 | package_data={'': ['version/*'], 50 | 'verl': ['trainer/config/*.yaml'],}, 51 | include_package_data=True, 52 | long_description=long_description, 53 | long_description_content_type='text/markdown' 54 | ) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/create_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from tests.e2e.envs.digit_completion import DigitCompletion, generate_ground_truth_response 16 | from torch.utils import data 17 | import os 18 | 19 | if __name__ == '__main__': 20 | simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9) 21 | all_prompts = simple_task.get_all_prompts() 22 | 23 | # 21 * 6 * 4 24 | train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2]) 25 | train_data = list(train_data) 26 | test_data = list(test_data) 27 | 28 | train_data = [[{'role': 'user', 'content': str(item)}] \ 29 | for item in train_data] 30 | test_data = [[{'role': 'user', 'content': str(item)}] \ 31 | for item in test_data] 32 | 33 | print(f'Size of train: {len(train_data)}, size of test: {len(test_data)}') 34 | 35 | train_data = {'prompt': train_data} 36 | test_data = {'prompt': test_data} 37 | 38 | model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__))) 39 | 40 | import pandas as pd 41 | 42 | train_data_frame = pd.DataFrame(train_data) 43 | test_data_frame = pd.DataFrame(test_data) 44 | 45 | train_data_frame.to_parquet(os.path.join(model_folder, 'train.parquet')) 46 | test_data_frame.to_parquet(os.path.join(model_folder, 'test.parquet')) 47 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/tests/e2e/arithmetic_sequence/data/test.parquet -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/tests/e2e/arithmetic_sequence/data/train.parquet -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaForCausalLM" 4 | ], 5 | "attention_bias": false, 6 | "attention_dropout": 0.0, 7 | "bos_token_id": null, 8 | "eos_token_id": 1, 9 | "hidden_act": "silu", 10 | "hidden_size": 128, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 344, 13 | "max_position_embeddings": 2048, 14 | "mlp_bias": false, 15 | "model_type": "llama", 16 | "num_attention_heads": 4, 17 | "num_hidden_layers": 4, 18 | "num_key_value_heads": 4, 19 | "pad_token_id": 2, 20 | "pretraining_tp": 1, 21 | "rms_norm_eps": 1e-06, 22 | "rope_scaling": null, 23 | "rope_theta": 10000.0, 24 | "tie_word_embeddings": false, 25 | "torch_dtype": "bfloat16", 26 | "transformers_version": "4.43.3", 27 | "use_cache": true, 28 | "vocab_size": 16 29 | } 30 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "eos_token_id": 1, 4 | "pad_token_id": 2, 5 | "transformers_version": "4.43.3" 6 | } 7 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "char_ords": [ 3 | 48, 4 | 49, 5 | 50, 6 | 51, 7 | 52, 8 | 53, 9 | 54, 10 | 55, 11 | 56, 12 | 57, 13 | 44, 14 | 58 15 | ], 16 | "model_max_length": 2048, 17 | "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}" 18 | } -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- 1 | # Digit completion 2 | 3 | This is an example of solving a digit completion problem. The problem is defined as below: 4 | 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers. 6 | If the max number is reached, the next number should be modulo with max number. 7 | 8 | For example, 9 | - prompt = [1, 2, 3] 10 | - N = 5 11 | - max_number = 6 12 | 13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1]. 14 | 15 | # Environment definition 16 | 17 | The core definition of the task is defined in verl/envs/digit_completion/task.py 18 | 19 | It is highly recommended to take a look at it for better understanding. 20 | 21 | 22 | 23 | # Run experiments 24 | 25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory) 26 | 27 | ```bash 28 | # cd examples/arithmetic_sequence/rl 29 | 30 | # Specify the config path and config name (current working dir) 31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' 32 | 33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using: 34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config 35 | 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /tests/e2e/check_results.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | import numpy as np 18 | 19 | 20 | def extract_reward_from_line(line): 21 | # TODO: this function needs error handling 22 | try: 23 | key_vals = line.split(' - ') 24 | for key_val in key_vals: 25 | key, val = key_val.split(':') 26 | if key == 'critic/rewards/mean': 27 | reward = float(val) 28 | return reward 29 | return -np.inf 30 | except Exception: 31 | return -np.inf 32 | 33 | 34 | if __name__ == '__main__': 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('--output_file', required=True, type=str) 37 | 38 | args = parser.parse_args() 39 | 40 | with open(args.output_file, 'r') as f: 41 | output = f.read().split('\n') 42 | 43 | best_reward = -np.inf 44 | for line in output: 45 | if line.startswith('step'): 46 | reward = extract_reward_from_line(line) 47 | if reward > best_reward: 48 | best_reward = reward 49 | 50 | print(f'Best reward is {best_reward}') 51 | assert best_reward > 0.2, f'Best reward must be greater than 0.2. best_reward: {best_reward}' 52 | print('Check passes') 53 | -------------------------------------------------------------------------------- /tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .digit_completion import DigitCompletion 16 | 17 | __all__ = ['DigitCompletion'] -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .task import DigitCompletion, generate_ground_truth_response 16 | from .tokenizer import CharTokenizer 17 | 18 | from transformers import AutoTokenizer, LlamaConfig 19 | 20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True) 21 | 22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer'] -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 19 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 24 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 25 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 26 | critic.optim.lr=1e-5 \ 27 | critic.model.use_remove_padding=True \ 28 | critic.model.path=Qwen/Qwen2.5-0.5B \ 29 | critic.model.enable_gradient_checkpointing=False \ 30 | critic.ppo_micro_batch_size=32 \ 31 | critic.model.fsdp_config.param_offload=False \ 32 | critic.model.fsdp_config.grad_offload=False \ 33 | critic.model.fsdp_config.optimizer_offload=False \ 34 | algorithm.kl_ctrl.kl_coef=0.001 \ 35 | trainer.critic_warmup=0 \ 36 | trainer.logger=['console'] \ 37 | trainer.project_name='verl_example_gsm8k' \ 38 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 39 | trainer.n_gpus_per_node=8 \ 40 | trainer.nnodes=1 \ 41 | trainer.save_freq=-1 \ 42 | trainer.total_training_steps=1 $@ 43 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=False \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 19 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 24 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 25 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 26 | critic.optim.lr=1e-5 \ 27 | critic.model.use_remove_padding=False \ 28 | critic.model.path=Qwen/Qwen2.5-0.5B \ 29 | critic.model.enable_gradient_checkpointing=False \ 30 | critic.ppo_micro_batch_size=32 \ 31 | critic.model.fsdp_config.param_offload=False \ 32 | critic.model.fsdp_config.grad_offload=False \ 33 | critic.model.fsdp_config.optimizer_offload=False \ 34 | algorithm.kl_ctrl.kl_coef=0.001 \ 35 | trainer.critic_warmup=0 \ 36 | trainer.logger=['console'] \ 37 | +trainer.val_before_train=False \ 38 | trainer.project_name='verl_example_gsm8k' \ 39 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 40 | trainer.n_gpus_per_node=8 \ 41 | trainer.nnodes=1 \ 42 | trainer.save_freq=-1 \ 43 | trainer.total_training_steps=1 $@ 44 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | data.return_raw_chat=True \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 19 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 20 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 21 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 22 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 26 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 27 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 28 | critic.optim.lr=1e-5 \ 29 | critic.model.use_remove_padding=True \ 30 | critic.optim.lr_warmup_steps_ratio=0.05 \ 31 | critic.model.path=Qwen/Qwen2.5-0.5B \ 32 | critic.model.enable_gradient_checkpointing=False \ 33 | critic.ppo_micro_batch_size=32 \ 34 | critic.model.fsdp_config.param_offload=False \ 35 | critic.model.fsdp_config.grad_offload=False \ 36 | critic.model.fsdp_config.optimizer_offload=False \ 37 | reward_model.enable=True \ 38 | reward_model.model.path=Qwen/Qwen2.5-0.5B\ 39 | reward_model.model.use_remove_padding=True \ 40 | reward_model.model.fsdp_config.param_offload=True \ 41 | reward_model.micro_batch_size=16 \ 42 | algorithm.kl_ctrl.kl_coef=0.001 \ 43 | trainer.critic_warmup=0 \ 44 | trainer.logger=['console'] \ 45 | +trainer.val_before_train=False \ 46 | trainer.project_name='verl_example' \ 47 | trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm' \ 48 | trainer.n_gpus_per_node=8 \ 49 | trainer.nnodes=1 \ 50 | trainer.save_freq=-1 \ 51 | trainer.total_training_steps=1 $@ 52 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | data.return_raw_chat=True \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=False \ 16 | actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 19 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 20 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 21 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 22 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 26 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 27 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 28 | critic.optim.lr=1e-5 \ 29 | critic.model.use_remove_padding=False \ 30 | critic.optim.lr_warmup_steps_ratio=0.05 \ 31 | critic.model.path=Qwen/Qwen2.5-0.5B \ 32 | critic.model.enable_gradient_checkpointing=False \ 33 | critic.ppo_micro_batch_size=32 \ 34 | critic.model.fsdp_config.param_offload=False \ 35 | critic.model.fsdp_config.grad_offload=False \ 36 | critic.model.fsdp_config.optimizer_offload=False \ 37 | reward_model.enable=True \ 38 | reward_model.model.path=Qwen/Qwen2.5-0.5B\ 39 | reward_model.model.use_remove_padding=False \ 40 | reward_model.model.fsdp_config.param_offload=True \ 41 | reward_model.micro_batch_size=16 \ 42 | algorithm.kl_ctrl.kl_coef=0.001 \ 43 | trainer.critic_warmup=0 \ 44 | +trainer.val_before_train=False \ 45 | trainer.logger=['console'] \ 46 | trainer.project_name='verl_example' \ 47 | trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm' \ 48 | trainer.n_gpus_per_node=8 \ 49 | trainer.nnodes=1 \ 50 | trainer.save_freq=-1 \ 51 | trainer.total_training_steps=1 $@ 52 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | data.return_raw_chat=True \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 19 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 20 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=12000 \ 21 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 28 | actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=12000 \ 29 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 30 | actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=12000 \ 31 | critic.optim.lr=1e-5 \ 32 | critic.model.use_remove_padding=True \ 33 | critic.optim.lr_warmup_steps_ratio=0.05 \ 34 | critic.model.path=Qwen/Qwen2.5-0.5B \ 35 | critic.model.enable_gradient_checkpointing=False \ 36 | critic.ppo_micro_batch_size=32 \ 37 | critic.use_dynamic_bsz=True \ 38 | critic.ppo_max_token_len_per_gpu=98304 \ 39 | critic.model.fsdp_config.param_offload=False \ 40 | critic.model.fsdp_config.grad_offload=False \ 41 | critic.model.fsdp_config.optimizer_offload=False \ 42 | reward_model.enable=True \ 43 | reward_model.model.path=Qwen/Qwen2.5-0.5B\ 44 | reward_model.model.use_remove_padding=True \ 45 | reward_model.model.fsdp_config.param_offload=True \ 46 | reward_model.micro_batch_size=16 \ 47 | reward_model.use_dynamic_bsz=True \ 48 | reward_model.forward_max_token_len_per_gpu=98304 \ 49 | algorithm.kl_ctrl.kl_coef=0.001 \ 50 | trainer.critic_warmup=0 \ 51 | trainer.logger=['console'] \ 52 | +trainer.val_before_train=False \ 53 | trainer.project_name='verl_example' \ 54 | trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm_seq_balance' \ 55 | trainer.n_gpus_per_node=8 \ 56 | trainer.nnodes=1 \ 57 | trainer.save_freq=-1 \ 58 | trainer.total_training_steps=1 $@ 59 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2 with flash_attn has some issues 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | data.return_raw_chat=True \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size=32 \ 19 | actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \ 20 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 21 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 23 | actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ 24 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 25 | actor_rollout_ref.rollout.name=vllm \ 26 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 27 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 28 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 29 | critic.optim.lr=1e-5 \ 30 | critic.ulysses_sequence_parallel_size=2 \ 31 | critic.model.use_remove_padding=True \ 32 | critic.optim.lr_warmup_steps_ratio=0.05 \ 33 | critic.model.path=Qwen/Qwen2.5-0.5B \ 34 | critic.model.enable_gradient_checkpointing=False \ 35 | critic.ppo_micro_batch_size=32 \ 36 | critic.model.fsdp_config.param_offload=False \ 37 | critic.model.fsdp_config.grad_offload=False \ 38 | critic.model.fsdp_config.optimizer_offload=False \ 39 | reward_model.enable=True \ 40 | reward_model.ulysses_sequence_parallel_size=2 \ 41 | reward_model.model.path=Qwen/Qwen2.5-0.5B\ 42 | reward_model.model.use_remove_padding=True \ 43 | reward_model.model.fsdp_config.param_offload=True \ 44 | reward_model.micro_batch_size=16 \ 45 | algorithm.kl_ctrl.kl_coef=0.001 \ 46 | trainer.critic_warmup=0 \ 47 | +trainer.val_before_train=False \ 48 | trainer.logger=['console'] \ 49 | trainer.project_name='verl_example' \ 50 | trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm_sp2' \ 51 | trainer.n_gpus_per_node=8 \ 52 | trainer.nnodes=1 \ 53 | trainer.save_freq=-1 \ 54 | trainer.total_training_steps=1 $@ 55 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt" 6 | 7 | export PATH=$PATH:~/.local/bin 8 | 9 | rm -rf $OUTPUT_FILE 10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 11 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 12 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 13 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 14 | critic.model.path=tests/e2e/arithmetic_sequence/model | tee $OUTPUT_FILE; 15 | 16 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE 17 | rm -rf $OUTPUT_FILE 18 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 6 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 7 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 8 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 9 | actor_rollout_ref.rollout.name=vllm \ 10 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 11 | actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \ 12 | critic.model.path=Qwen/Qwen2.5-0.5B \ 13 | critic.model.use_remove_padding=True \ 14 | trainer.total_epochs=1 -------------------------------------------------------------------------------- /tests/gpu_utility/test_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_flash_attn_cross_entropy(): 17 | from verl.utils.torch_functional import logprobs_from_logits_naive 18 | 19 | from verl.utils.debug import log_gpu_memory_usage 20 | 21 | from flash_attn.ops.triton.cross_entropy import cross_entropy_loss 22 | 23 | import torch 24 | from torch import nn 25 | 26 | log_gpu_memory_usage('At start') 27 | 28 | hidden_states = torch.randn(size=(2048, 5120), device='cuda', requires_grad=True, dtype=torch.bfloat16) 29 | 30 | linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device='cuda', dtype=torch.bfloat16) 31 | 32 | logits = linear(hidden_states) 33 | 34 | # logits = logits.float() 35 | labels = torch.randint(low=0, high=155136, size=(2048,), device='cuda') 36 | 37 | log_gpu_memory_usage('before computation') 38 | # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True) 39 | output = -cross_entropy_loss(logits, labels)[0] 40 | # output = logprobs_from_logits(logits, labels) 41 | log_gpu_memory_usage('After forward') 42 | output.sum().backward() 43 | log_gpu_memory_usage('After backward') 44 | 45 | groundtruth = logprobs_from_logits_naive(logits.float(), labels) 46 | 47 | torch.testing.assert_close(output, groundtruth) 48 | -------------------------------------------------------------------------------- /tests/ray/check_worker_alive/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | import sys 17 | import os 18 | 19 | import ray 20 | 21 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup 22 | from verl.single_controller.base.worker import Worker 23 | from verl.single_controller.base.decorator import register, Dispatch 24 | 25 | 26 | @ray.remote 27 | class TestActor(Worker): 28 | 29 | def __init__(self) -> None: 30 | super().__init__() 31 | 32 | @register(dispatch_mode=Dispatch.ONE_TO_ALL, blocking=False) 33 | def foo(self, wait_time): 34 | time.sleep(wait_time) 35 | sys.exit(1) 36 | 37 | 38 | if __name__ == "__main__": 39 | wait_time = int(os.getenv("WAIT_TIME", "10")) 40 | 41 | ray.init() 42 | 43 | # test single-node-no-partition 44 | print(f"test single-node-no-partition") 45 | resource_pool = RayResourcePool([2], use_gpu=True) 46 | class_with_args = RayClassWithInitArgs(cls=TestActor) 47 | 48 | print("create worker group") 49 | wg = RayWorkerGroup(resource_pool, class_with_args, name_prefix="test") 50 | 51 | wg.start_worker_aliveness_check(1) 52 | time.sleep(1) 53 | 54 | print(time.time(), "start foo") 55 | 56 | _ = wg.foo(wait_time) 57 | print("foo started") 58 | 59 | print(time.time(), 60 | f"wait 6x wait time {wait_time*6} to let signal returned to process but still not exceed process wait time") 61 | time.sleep(wait_time * 6) 62 | 63 | ray.shutdown() 64 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | In client, we can get the server handler and send RPC request 16 | """ 17 | 18 | import ray 19 | import torch 20 | 21 | from verl import DataProto 22 | from verl.single_controller.ray import RayClassWithInitArgs 23 | from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup 24 | 25 | from tensordict import TensorDict 26 | 27 | from server import Trainer 28 | 29 | 30 | def compute_position_id_with_mask(mask): 31 | return torch.clip(torch.cumsum(mask, dim=-1) - 1, min=0, max=None) 32 | 33 | 34 | if __name__ == '__main__': 35 | 36 | ray.init(address='auto', namespace='verl') 37 | # get the worker group using names 38 | worker_names = ['trainerTrainer_0:0', 'trainerTrainer_0:1'] 39 | cls_with_init_args = RayClassWithInitArgs(cls=Trainer) 40 | worker_group = NVMegatronRayWorkerGroup.from_detached(worker_names=worker_names, 41 | ray_cls_with_init=cls_with_init_args) 42 | 43 | batch_size = 16 44 | sequence_length = 1024 45 | 46 | # give Trainer some data to train 47 | input_ids = torch.randint(low=0, high=256, size=(batch_size, sequence_length), dtype=torch.int64, device='cuda') 48 | attention_mask = torch.ones_like(input_ids) 49 | position_ids = compute_position_id_with_mask(attention_mask) 50 | 51 | data = DataProto(batch=TensorDict( 52 | { 53 | 'input_ids': input_ids, 54 | 'attention_mask': attention_mask, 55 | 'position_ids': position_ids 56 | }, batch_size=batch_size), 57 | meta_info={}) 58 | 59 | output = worker_group.train_model(data) 60 | 61 | print(output) 62 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /tests/ray/test_check_worker_alive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | import os 17 | import subprocess 18 | 19 | 20 | def test(): 21 | wait_time = 10 22 | 23 | my_env = os.environ.copy() 24 | my_env["WAIT_TIME"] = str(wait_time) 25 | 26 | p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE) 27 | 28 | count = 0 29 | while b"foo started" not in p.stdout.read(): 30 | time.sleep(1) 31 | count += 1 32 | if count > 40: 33 | raise RuntimeError("timeout for start foo in check_worker_alive/main.py") 34 | 35 | print( 36 | time.time(), 37 | f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time") 38 | time.sleep(wait_time * 1.5) 39 | print(time.time(), f"start checking") 40 | assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort" 41 | assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code" 42 | print(f"test passed") 43 | 44 | 45 | if __name__ == "__main__": 46 | test() 47 | -------------------------------------------------------------------------------- /tests/ray/test_driverfunc_to_worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import ray 17 | import torch 18 | from verl import DataProto 19 | from tensordict import TensorDict 20 | 21 | from verl.single_controller.base.worker import Worker 22 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs 23 | from verl.single_controller.ray import RayWorkerGroup 24 | 25 | os.environ['RAY_DEDUP_LOGS'] = '0' 26 | os.environ['NCCL_DEBUG'] = 'WARN' 27 | 28 | 29 | @ray.remote 30 | class ModelActor(Worker): 31 | 32 | def __init__(self): 33 | pass 34 | 35 | 36 | class HackSelf(): 37 | 38 | def __init__(self): 39 | pass 40 | 41 | 42 | def get_aux_metrics(self, test_proto): 43 | sequence_ids = test_proto.batch["sequence_ids"] 44 | decode_count = [] 45 | for i in range(sequence_ids.size(0)): 46 | decode_count.append(len(sequence_ids[i].tolist())) 47 | ret_proto = DataProto(batch=TensorDict({ 48 | "sequence_ids": sequence_ids, 49 | "decode_count": torch.tensor(decode_count) 50 | }, 51 | batch_size=sequence_ids.size(0))) 52 | return ret_proto 53 | 54 | 55 | def test(): 56 | # construct model 57 | ray.init() 58 | 59 | # create 2 workers, each hold a GPU 60 | resource_pool = RayResourcePool([2], use_gpu=True, name_prefix='a') 61 | 62 | class_with_args = RayClassWithInitArgs(cls=ModelActor) 63 | shard_wg = RayWorkerGroup(resource_pool, class_with_args) 64 | 65 | test_bs = 8 66 | test_proto = DataProto(TensorDict({ 67 | "sequence_ids": torch.ones([test_bs, 2048], dtype=torch.int64), 68 | }, 69 | batch_size=test_bs), 70 | meta_info={"query_length": 1536}) 71 | 72 | # Sharding among different ranks 73 | ret_proto1 = shard_wg.execute_with_func_generator(get_aux_metrics, test_proto) 74 | 75 | # compare execute on driver 76 | hs = HackSelf() 77 | ret_proto2 = get_aux_metrics(hs, test_proto) 78 | 79 | torch.testing.assert_close(ret_proto1.batch["decode_count"], ret_proto2.batch["decode_count"]) 80 | 81 | ray.shutdown() 82 | -------------------------------------------------------------------------------- /tests/ray/test_ray_local_envs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | e2e test verl.single_controller.ray 16 | """ 17 | import os 18 | import ray 19 | 20 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup 21 | from verl.single_controller.base.worker import Worker 22 | from verl.single_controller.base.decorator import register, Dispatch, collect_all_to_all, Execute 23 | 24 | 25 | @ray.remote 26 | class TestActor(Worker): 27 | 28 | def __init__(self) -> None: 29 | super().__init__() 30 | 31 | def getenv(self, key): 32 | val = os.getenv(key, f"{key} not set") 33 | return val 34 | 35 | 36 | def test_basics(): 37 | ray.init() 38 | 39 | # create 4 workers, each hold a GPU 40 | resource_pool = RayResourcePool([4], use_gpu=True) 41 | class_with_args = RayClassWithInitArgs(cls=TestActor) 42 | 43 | worker_group = RayWorkerGroup(resource_pool=resource_pool, 44 | ray_cls_with_init=class_with_args, 45 | name_prefix="worker_group_basic") 46 | 47 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE") 48 | assert output == ["4", "4", "4", "4"] 49 | 50 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK") 51 | assert set(output) == set(["0", "1", "2", "3"]) 52 | 53 | ray.shutdown() 54 | 55 | 56 | if __name__ == '__main__': 57 | test_basics() 58 | -------------------------------------------------------------------------------- /tests/ray/test_rvdz.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class TestWorker: 20 | 21 | def __init__(self, rank, world_size, group_name): 22 | self.rank = rank 23 | self.world_size = world_size 24 | self.group_name = group_name 25 | self.communicator = None 26 | 27 | def init(self): 28 | from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray 29 | self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name) 30 | 31 | def test(self): 32 | if self.communicator is None: 33 | return None 34 | return self.communicator.rank_id() 35 | 36 | 37 | def test_rvdz(): 38 | ray.init() 39 | 40 | group_name = "test_group" 41 | world_size = 2 42 | 43 | workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)] 44 | 45 | ray.get([worker.init.remote() for worker in workers]) 46 | 47 | ranks = ray.get([worker.test.remote() for worker in workers]) 48 | 49 | assert ranks == [0, 1], f"expecting [0, 1], got {ranks}" 50 | 51 | ray.shutdown() 52 | -------------------------------------------------------------------------------- /tests/sanity/check_license.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | license_head = "Copyright 2024 Bytedance Ltd. and/or its affiliates" 16 | 17 | from pathlib import Path 18 | from argparse import ArgumentParser 19 | 20 | if __name__ == '__main__': 21 | parser = ArgumentParser() 22 | parser.add_argument('--directory', '-d', required=True, type=str) 23 | args = parser.parse_args() 24 | directory_in_str = args.directory 25 | 26 | pathlist = Path(directory_in_str).glob('**/*.py') 27 | for path in pathlist: 28 | # because path is object not string 29 | path_in_str = str(path.absolute()) 30 | with open(path_in_str, 'r') as f: 31 | file_content = f.read() 32 | 33 | assert license_head in file_content, f'file {path_in_str} does not contain license' 34 | 35 | print(path_in_str) 36 | -------------------------------------------------------------------------------- /tests/sanity/test_import.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_import(): 17 | import verl 18 | print(verl.__version__) 19 | 20 | 21 | def test_single_controller_import(): 22 | import verl.single_controller 23 | print(verl.single_controller.__version__) 24 | -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_rl_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import torch 16 | from torch.utils.data import DataLoader 17 | from transformers import AutoTokenizer 18 | 19 | 20 | def get_gsm8k_data(): 21 | # prepare test dataset 22 | url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/gsm8k/train.parquet" 23 | local_folder = os.path.expanduser('~/verl-data/gsm8k/') 24 | local_path = os.path.join(local_folder, 'train.parquet') 25 | os.makedirs(local_folder, exist_ok=True) 26 | return local_path 27 | 28 | 29 | def test_rl_dataset(): 30 | from verl.utils.dataset.rl_dataset import RLHFDataset, collate_fn 31 | from verl.utils import hf_tokenizer 32 | tokenizer = hf_tokenizer('deepseek-ai/deepseek-coder-1.3b-instruct') 33 | local_path = get_gsm8k_data() 34 | dataset = RLHFDataset(parquet_files=local_path, tokenizer=tokenizer, prompt_key='prompt', max_prompt_length=256) 35 | 36 | dataloader = DataLoader(dataset=dataset, batch_size=16, shuffle=True, drop_last=True, collate_fn=collate_fn) 37 | 38 | a = next(iter(dataloader)) 39 | 40 | from verl import DataProto 41 | 42 | tensors = {} 43 | non_tensors = {} 44 | 45 | for key, val in a.items(): 46 | if isinstance(val, torch.Tensor): 47 | tensors[key] = val 48 | else: 49 | non_tensors[key] = val 50 | 51 | data_proto = DataProto.from_dict(tensors=tensors, non_tensors=non_tensors) 52 | 53 | data = dataset[0]['input_ids'] 54 | output = tokenizer.batch_decode([data])[0] 55 | print(f'type: type{output}') 56 | print(f'\n\noutput: {output}') 57 | -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from transformers import AutoTokenizer 17 | from verl.utils import hf_tokenizer 18 | from verl.utils.dataset.rm_dataset import RMDataset 19 | 20 | 21 | def get_rm_data(): 22 | # prepare test dataset 23 | url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet" 24 | local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/') 25 | local_path = os.path.join(local_folder, 'test.parquet') 26 | os.makedirs(local_folder, exist_ok=True) 27 | return local_path 28 | 29 | 30 | def test_rm_dataset(): 31 | tokenizer = hf_tokenizer("facebook/opt-1.3b") 32 | local_path = get_rm_data() 33 | dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512) 34 | data = dataset[0]['input_ids'] 35 | output = tokenizer.batch_decode(data) 36 | assert len(output) > 1 37 | assert type(output[0]) == str 38 | -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_sft_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from transformers import AutoTokenizer 17 | from verl.utils import hf_tokenizer 18 | from verl.utils.dataset.sft_dataset import SFTDataset 19 | 20 | 21 | def get_gsm8k_data(): 22 | # prepare test dataset 23 | url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/gsm8k/train.parquet" 24 | local_folder = os.path.expanduser('~/verl-data/gsm8k/') 25 | local_path = os.path.join(local_folder, 'train.parquet') 26 | return local_path 27 | 28 | 29 | def test_sft_cot_dataset(): 30 | tokenizer = hf_tokenizer('deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct') 31 | local_path = get_gsm8k_data() 32 | dataset = SFTDataset(parquet_files=local_path, 33 | tokenizer=tokenizer, 34 | prompt_key='prompt', 35 | prompt_dict_keys=['content'], 36 | response_key='extra_info', 37 | response_dict_keys=['answer'], 38 | max_length=512) 39 | 40 | data = dataset[0]['input_ids'] 41 | output = tokenizer.batch_decode([data])[0] 42 | assert len(output) > 1 43 | assert type(output) == str 44 | 45 | 46 | def test_sft_dataset(): 47 | tokenizer = hf_tokenizer('deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct') 48 | local_path = get_gsm8k_data() 49 | dataset = SFTDataset(parquet_files=local_path, 50 | tokenizer=tokenizer, 51 | prompt_key='extra_info', 52 | prompt_dict_keys=['question'], 53 | response_key='extra_info', 54 | response_dict_keys=['answer'], 55 | max_length=512) 56 | 57 | data = dataset[0]['input_ids'] 58 | output = tokenizer.batch_decode([data])[0] 59 | assert len(output) > 1 60 | assert type(output) == str -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | 22 | from .protocol import DataProto 23 | 24 | from .utils.logging_utils import set_basic_config 25 | import logging 26 | 27 | set_basic_config(level=logging.WARNING) 28 | -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 3 | ## Adding a New Huggingface Model 4 | ### Step 1: Copy the model file from HF to verl 5 | - Add a new file under verl/models/hf 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf 7 | 8 | ### Step 2: Modify the model file to use packed inputs 9 | - Remove all the code related to inference (kv cache) 10 | - Modify the inputs to include only 11 | - input_ids (total_nnz,) 12 | - cu_seqlens (total_nnz + 1,) 13 | - max_seqlen_in_batch: int 14 | - Note that this requires using flash attention with causal mask. 15 | 16 | ### Step 2.5: Add tests 17 | - Add a test to compare this version and the huggingface version 18 | - Following the infrastructure and add tests to tests/models/hf 19 | 20 | ### Step 3: Add a function to apply tensor parallelism 21 | - Please follow 22 | - https://pytorch.org/docs/stable/distributed.tensor.parallel.html 23 | - https://pytorch.org/tutorials/intermediate/TP_tutorial.html 24 | - General comments 25 | - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward. 26 | 27 | ### Step 4: Add a function to apply data parallelism 28 | - Please use FSDP2 APIs 29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413 30 | 31 | ### Step 5: Add a function to apply pipeline parallelism 32 | - Comes in Pytorch 2.4 33 | - Currently only in alpha in nightly version 34 | - Check torchtitan for more details 35 | 36 | -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | # original model with megatron 17 | ParallelLlamaModel, 18 | ParallelLlamaForCausalLM, 19 | # rmpad with megatron 20 | ParallelLlamaForCausalLMRmPad, 21 | ParallelLlamaForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelLlamaForCausalLMRmPadPP, 24 | ParallelLlamaForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_mlp import ParallelLlamaMLP 18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 19 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | import torch 17 | from megatron.core import ModelParallelConfig 18 | from torch import nn 19 | from transformers import LlamaConfig 20 | 21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 22 | from verl.utils.megatron import sequence_parallel as sp_utils 23 | 24 | 25 | class ParallelLlamaRMSNorm(nn.Module): 26 | 27 | def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig): 28 | """ 29 | LlamaRMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine(input=hidden_states, 43 | weight=self.weight, 44 | normalized_shape=self.normalized_shape, 45 | eps=self.variance_epsilon, 46 | memory_efficient=True) -------------------------------------------------------------------------------- /verl/models/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import importlib 16 | from typing import List, Optional, Type 17 | 18 | import torch.nn as nn 19 | 20 | # Supported models using HF Rmpad 21 | # TODO(sgm): HF may supported more than listed here, we should add more after testing 22 | from transformers import LlamaConfig, MistralConfig, GemmaConfig, Qwen2Config 23 | 24 | _REOVEPAD_MODELS = {'llama': LlamaConfig, 'mistral': MistralConfig, 'gemma': GemmaConfig, 'qwen2': Qwen2Config} 25 | 26 | 27 | def check_model_support_rmpad(model_type: str): 28 | assert isinstance(model_type, str) 29 | if not model_type in _REOVEPAD_MODELS.keys(): 30 | raise ValueError(f"Model architecture {model_type} is not supported for now. " 31 | f"RMPad supported architectures: {_REOVEPAD_MODELS.keys()}." 32 | f"Please set `use_remove_padding=False` in the model config.") 33 | 34 | 35 | # Supported models in Megatron-LM 36 | # Architecture -> (module, class). 37 | _MODELS = { 38 | "LlamaForCausalLM": 39 | ("llama", ("ParallelLlamaForCausalLMRmPadPP", "ParallelLlamaForValueRmPadPP", "ParallelLlamaForCausalLMRmPad")), 40 | "MistralForCausalLM": ("mistral", ("ParallelMistralForCausalLMRmPadPP", "ParallelMistralForValueRmPadPP", 41 | "ParallelMistralForCausalLMRmPad")) 42 | } 43 | 44 | 45 | # return model class 46 | class ModelRegistry: 47 | 48 | @staticmethod 49 | def load_model_cls(model_arch: str, value=False) -> Optional[Type[nn.Module]]: 50 | if model_arch not in _MODELS: 51 | return None 52 | 53 | megatron = "megatron" 54 | 55 | module_name, model_cls_name = _MODELS[model_arch] 56 | if not value: # actor/ref 57 | model_cls_name = model_cls_name[0] 58 | elif value: # critic/rm 59 | model_cls_name = model_cls_name[1] 60 | 61 | module = importlib.import_module(f"verl.models.{module_name}.{megatron}.modeling_{module_name}_megatron") 62 | return getattr(module, model_cls_name, None) 63 | 64 | @staticmethod 65 | def get_supported_archs() -> List[str]: 66 | return list(_MODELS.keys()) 67 | -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def get_weight_loader(arch: str): 17 | from verl.models.llama.megatron.checkpoint_utils.llama_loader import load_state_dict_to_megatron_llama 18 | _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {'LlamaForCausalLM': load_state_dict_to_megatron_llama} 19 | 20 | if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY: 21 | return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch] 22 | raise ValueError(f"Model architectures {arch} are not supported for now. " 23 | f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}") 24 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool 17 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | from dataclasses import dataclass 17 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo 18 | 19 | 20 | class MegatronWorker(Worker): 21 | 22 | def __init__(self, cuda_visible_devices=None) -> None: 23 | super().__init__(cuda_visible_devices) 24 | 25 | def get_megatron_global_info(self): 26 | from megatron.core import parallel_state as mpu 27 | tp_size = mpu.get_tensor_model_parallel_world_size() 28 | dp_size = mpu.get_data_parallel_world_size() 29 | pp_size = mpu.get_pipeline_model_parallel_world_size() 30 | info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size) 31 | return info 32 | 33 | def get_megatron_rank_info(self): 34 | from megatron.core import parallel_state as mpu 35 | tp_rank = mpu.get_tensor_model_parallel_rank() 36 | dp_rank = mpu.get_data_parallel_rank() 37 | pp_rank = mpu.get_pipeline_model_parallel_rank() 38 | info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank) 39 | return info -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from .worker import DistRankInfo, DistGlobalInfo 18 | from verl.single_controller.base import ResourcePool, WorkerGroup 19 | 20 | 21 | class MegatronWorkerGroup(WorkerGroup): 22 | 23 | def __init__(self, resource_pool: ResourcePool, **kwargs): 24 | super().__init__(resource_pool=resource_pool, **kwargs) 25 | self._megatron_rank_info = None 26 | self._megatron_global_info: DistGlobalInfo = None 27 | 28 | def init_megatron(self, default_megatron_kwargs: Dict = None): 29 | raise NotImplementedError(f"MegatronWorkerGroup.init_megatron should be overwritten") 30 | 31 | def get_megatron_rank_info(self, rank: int) -> DistRankInfo: 32 | assert 0 <= rank < self.world_size, f'rank must be from [0, world_size), Got {rank}' 33 | return self._megatron_rank_info[rank] 34 | 35 | @property 36 | def tp_size(self): 37 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 38 | return self._megatron_global_info.tp_size 39 | 40 | @property 41 | def dp_size(self): 42 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 43 | return self._megatron_global_info.dp_size 44 | 45 | @property 46 | def pp_size(self): 47 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 48 | return self._megatron_global_info.pp_size 49 | 50 | def get_megatron_global_info(self): 51 | return self._megatron_global_info 52 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class WorkerGroupRegisterCenter: 20 | 21 | def __init__(self, rank_zero_info): 22 | self.rank_zero_info = rank_zero_info 23 | 24 | def get_rank_zero_info(self): 25 | return self.rank_zero_info 26 | 27 | 28 | def create_worker_group_register_center(name, info): 29 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 30 | -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls 16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo) -------------------------------------------------------------------------------- /verl/single_controller/version/version: -------------------------------------------------------------------------------- 1 | 0.0.2 -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from importlib.metadata import version, PackageNotFoundError 16 | 17 | 18 | def get_version(pkg): 19 | try: 20 | return version(pkg) 21 | except PackageNotFoundError: 22 | return None 23 | 24 | 25 | package_name = 'vllm' 26 | package_version = get_version(package_name) 27 | 28 | if package_version == '0.3.1': 29 | vllm_version = '0.3.1' 30 | from .vllm_v_0_3_1.llm import LLM 31 | from .vllm_v_0_3_1.llm import LLMEngine 32 | from .vllm_v_0_3_1 import parallel_state 33 | elif package_version == '0.4.2': 34 | vllm_version = '0.4.2' 35 | from .vllm_v_0_4_2.llm import LLM 36 | from .vllm_v_0_4_2.llm import LLMEngine 37 | from .vllm_v_0_4_2 import parallel_state 38 | elif package_version == '0.5.4': 39 | vllm_version = '0.5.4' 40 | from .vllm_v_0_5_4.llm import LLM 41 | from .vllm_v_0_5_4.llm import LLMEngine 42 | from .vllm_v_0_5_4 import parallel_state 43 | elif package_version == '0.6.3': 44 | vllm_version = '0.6.3' 45 | from .vllm_v_0_6_3.llm import LLM 46 | from .vllm_v_0_6_3.llm import LLMEngine 47 | from .vllm_v_0_6_3 import parallel_state 48 | else: 49 | raise ValueError( 50 | f'vllm version {package_version} not supported. Currently supported versions are 0.3.1, 0.4.2, 0.5.4 and 0.6.3.' 51 | ) 52 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models 15 | 16 | from typing import Dict, Union, Optional, Iterable, Tuple 17 | 18 | import torch 19 | import torch.nn as nn 20 | 21 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 22 | from vllm.model_executor.model_loader.weight_utils import default_weight_loader 23 | 24 | 25 | def update_hf_weight_loader(): 26 | print('no hf weight loader need to be updated') 27 | return 28 | 29 | 30 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 31 | assert isinstance(actor_weights, Dict) 32 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 33 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys(): 34 | del actor_weights["lm_head.weight"] 35 | vllm_model.load_weights(actor_weights.items()) 36 | for _, module in vllm_model.named_modules(): 37 | quant_method = getattr(module, "quant_method", None) 38 | if quant_method is not None: 39 | quant_method.process_weights_after_loading(module) 40 | # FIXME: Remove this after Mixtral is updated 41 | # to use quant_method. 42 | if hasattr(module, "process_weights_after_loading"): 43 | module.process_weights_after_loading() 44 | vllm_model = vllm_model.cuda() 45 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader 15 | 16 | from typing import Dict 17 | 18 | import torch.nn as nn 19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 20 | 21 | 22 | def update_hf_weight_loader(): 23 | print("no hf weight loader need to be updated") 24 | return 25 | 26 | 27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 28 | assert isinstance(actor_weights, Dict) 29 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 30 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys(): 31 | del actor_weights["lm_head.weight"] 32 | vllm_model.load_weights(actor_weights.items()) 33 | for _, module in vllm_model.named_modules(): 34 | quant_method = getattr(module, "quant_method", None) 35 | if quant_method is not None: 36 | quant_method.process_weights_after_loading(module) 37 | # FIXME: Remove this after Mixtral is updated 38 | # to use quant_method. 39 | if hasattr(module, "process_weights_after_loading"): 40 | module.process_weights_after_loading() 41 | vllm_model = vllm_model.cuda() 42 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import Optional 17 | 18 | from transformers import PreTrainedTokenizer 19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup 20 | from vllm.utils import LRUCache 21 | 22 | 23 | class TokenizerGroup(TokenizerGroup): 24 | """A group of tokenizers that can be used for LoRA adapters.""" 25 | 26 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, 27 | max_input_length: Optional[int]): 28 | self.enable_lora = enable_lora 29 | self.max_input_length = max_input_length 30 | self.tokenizer = tokenizer 31 | self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None 32 | 33 | # FIXME(sgm): for simplicity, we assign the special token here 34 | @property 35 | def pad_token_id(self): 36 | return self.tokenizer.pad_token_id 37 | 38 | @property 39 | def eos_token_id(self): 40 | return self.tokenizer.eos_token_id 41 | -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | 5 | data: 6 | path: ~/data/rlhf/math/test.parquet 7 | prompt_key: prompt 8 | n_samples: 5 9 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 10 | batch_size: 128 11 | 12 | model: 13 | path: ~/models/Qwen2-7B-Instruct 14 | external_lib: null 15 | rollout: 16 | name: vllm 17 | temperature: 1.0 18 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 19 | top_p: 0.7 20 | prompt_length: 1536 21 | response_length: 512 22 | # for vllm rollout 23 | dtype: bfloat16 # should align with FSDP 24 | gpu_memory_utilization: 0.5 25 | ignore_eos: False 26 | micro_batch_size: 256 27 | enforce_eager: True 28 | free_cache_engine: True 29 | load_format: dummy_dtensor 30 | tensor_model_parallel_size: 1 31 | max_num_batched_tokens: 8192 32 | max_num_seqs: 1024 33 | log_prob_micro_batch_size: 8 34 | # for hf rollout 35 | do_sample: True -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: 16 # this is also val batch size 4 | train_files: ~/data/gsm8k/train.parquet 5 | val_files: ~/data/gsm8k/test.parquet 6 | prompt_key: question 7 | response_key: answer 8 | max_length: 1024 9 | truncation: error 10 | balance_dp_token: False 11 | chat_template: null 12 | model: 13 | partial_pretrain: ~/models/gemma-1.1-7b-it 14 | fsdp_config: 15 | wrap_policy: 16 | min_num_params: 0 17 | cpu_offload: False 18 | offload_params: False 19 | external_lib: null 20 | enable_gradient_checkpointing: False 21 | trust_remote_code: False 22 | optim: 23 | lr: 1e-5 24 | betas: [0.9, 0.95] 25 | weight_decay: 0.01 26 | warmup_steps_ratio: 0.1 27 | clip_grad: 1.0 28 | 29 | trainer: 30 | default_local_dir: /tmp/sft_model 31 | default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here 32 | resume_path: null 33 | project_name: gsm8k-sft 34 | experiment_name: test 35 | total_epochs: 4 36 | logger: ['console'] 37 | seed: 1 38 | 39 | -------------------------------------------------------------------------------- /verl/trainer/main_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Offline evaluate the performance of a generated file using reward model and ground truth verifier. 16 | The input is a parquet file that contains N generated sequences and (optional) the ground truth. 17 | 18 | """ 19 | 20 | import hydra 21 | from verl.utils.fs import copy_local_path_from_hdfs 22 | from verl.utils.reward_score import math, gsm8k, kk 23 | import pandas as pd 24 | import numpy as np 25 | 26 | 27 | def select_reward_fn(data_source): 28 | if data_source == 'lighteval/MATH': 29 | return math.compute_score 30 | if 'kk' in data_source: 31 | return kk.compute_score 32 | else: 33 | raise NotImplementedError 34 | 35 | 36 | @hydra.main(config_path='config', config_name='evaluation', version_base=None) 37 | def main(config): 38 | local_path = copy_local_path_from_hdfs(config.data.path) 39 | dataset = pd.read_parquet(local_path) 40 | prompts = dataset[config.data.prompt_key] 41 | responses = dataset[config.data.response_key] 42 | data_sources = dataset[config.data.data_source_key] 43 | reward_model_data = dataset[config.data.reward_model_key] 44 | 45 | passes = 0 46 | 47 | total = len(dataset) 48 | 49 | for i in range(total): 50 | response_lst = responses[i] 51 | data_source = data_sources[i] 52 | # select reward score based on data_source 53 | prompt = prompts[i] 54 | reward_data = reward_model_data[i] 55 | reward_fn = select_reward_fn(data_source) 56 | ground_truth = reward_data['ground_truth'] 57 | score_lst = [] 58 | for r in response_lst: 59 | score = reward_fn(r, ground_truth) 60 | score_lst.append(score) 61 | 62 | max_score = np.max(score_lst) 63 | 64 | if max_score == 3: 65 | passes += 1 66 | 67 | print(f'pass@5: {passes / total}') 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import tokenizer 16 | from .tokenizer import * 17 | 18 | __all__ = tokenizer.__all__ -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import log_gpu_memory_usage -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.distributed as dist 17 | import logging 18 | 19 | 20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0): 21 | if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank): 22 | memory_allocated = torch.cuda.memory_allocated() / 1024**3 23 | memory_reserved = torch.cuda.memory_reserved() / 1024**3 24 | 25 | message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}' 26 | 27 | if logger is None: 28 | print(message) 29 | else: 30 | logger.log(msg=message, level=level) 31 | -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | import os 16 | 17 | 18 | def initialize_global_process_group(timeout_second=36000): 19 | import torch.distributed 20 | from datetime import timedelta 21 | torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second)) 22 | local_rank = int(os.environ["LOCAL_RANK"]) 23 | rank = int(os.environ["RANK"]) 24 | world_size = int(os.environ["WORLD_SIZE"]) 25 | 26 | if torch.distributed.is_initialized(): 27 | torch.cuda.set_device(local_rank) 28 | return local_rank, rank, world_size 29 | -------------------------------------------------------------------------------- /verl/utils/import_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Utilities to check if packages are available. 16 | We assume package availability won't change during runtime. 17 | """ 18 | 19 | from functools import cache 20 | from typing import List 21 | 22 | 23 | @cache 24 | def is_megatron_core_available(): 25 | try: 26 | from megatron.core import parallel_state as mpu 27 | return True 28 | except ImportError: 29 | return False 30 | 31 | 32 | @cache 33 | def is_vllm_available(): 34 | try: 35 | import vllm 36 | return True 37 | except ImportError: 38 | return False 39 | 40 | 41 | def import_external_libs(external_libs=None): 42 | if external_libs is None: 43 | return 44 | if not isinstance(external_libs, List): 45 | external_libs = [external_libs] 46 | import importlib 47 | for external_lib in external_libs: 48 | importlib.import_module(external_lib) 49 | -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | A Ray logger will receive logging info from different processes. 16 | """ 17 | import numbers 18 | from typing import Dict 19 | 20 | 21 | def concat_dict_to_str(dict: Dict, step): 22 | output = [f'step:{step}'] 23 | for k, v in dict.items(): 24 | if isinstance(v, numbers.Number): 25 | output.append(f'{k}:{v:.3f}') 26 | output_str = ' - '.join(output) 27 | return output_str 28 | 29 | 30 | class LocalLogger: 31 | 32 | def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False): 33 | self.print_to_console = print_to_console 34 | if print_to_console: 35 | print('Using LocalLogger is deprecated. The constructor API will change ') 36 | 37 | def flush(self): 38 | pass 39 | 40 | def log(self, data, step): 41 | if self.print_to_console: 42 | print(concat_dict_to_str(data, step=step), flush=True) -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | 17 | 18 | def set_basic_config(level): 19 | """ 20 | This function sets the global logging format and level. It will be called when import verl 21 | """ 22 | logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level) 23 | -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | 18 | class MemoryBuffer: 19 | 20 | def __init__(self, numel, numel_padded, dtype): 21 | self.numel = numel 22 | self.numel_padded = numel_padded 23 | self.dtype = dtype 24 | self.data = torch.zeros(self.numel_padded, 25 | dtype=self.dtype, 26 | device=torch.cuda.current_device(), 27 | requires_grad=False) 28 | 29 | def zero(self): 30 | """Reset the buffer to zero.""" 31 | self.data.zero_() 32 | 33 | def get(self, shape, start_index): 34 | """Return a tensor with the input `shape` as a view into the 35 | 1-D data starting at `start_index`.""" 36 | end_index = start_index + shape.numel() 37 | assert end_index <= self.numel, \ 38 | 'requested tensor is out of the buffer range.' 39 | buffer_tensor = self.data[start_index:end_index] 40 | buffer_tensor = buffer_tensor.view(shape) 41 | return buffer_tensor 42 | -------------------------------------------------------------------------------- /verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | from megatron.core import parallel_state as mpu 18 | 19 | from .sequence_parallel import pad_to_sequence_parallel 20 | 21 | 22 | def compute_transformers_input_shapes(batches, meta_info): 23 | from flash_attn.bert_padding import unpad_input # flash 2 is a must for Megatron 24 | # pre-compute input shapes for each micro-batch at each pp stage 25 | input_shapes = [] 26 | for model_inputs in batches: 27 | input_ids = model_inputs['input_ids'] 28 | attention_mask = model_inputs['attention_mask'] 29 | input_ids_rmpad = unpad_input(input_ids.unsqueeze(dim=-1), attention_mask)[0] # (total_nnz, 1) 30 | if meta_info['sequence_parallel']: 31 | input_ids_rmpad = pad_to_sequence_parallel(input_ids_rmpad) 32 | # compute shapes for model_inputs 33 | input_shapes.append( 34 | torch.Size([ 35 | input_ids_rmpad.shape[0] // mpu.get_tensor_model_parallel_world_size(), 1, meta_info['hidden_size'] 36 | ])) 37 | else: 38 | # compute shapes for model_inputs 39 | input_shapes.append(torch.Size([input_ids_rmpad.shape[0], 1, meta_info['hidden_size']])) 40 | return input_shapes 41 | 42 | 43 | def make_batch_generator(batches, vpp_size): 44 | if vpp_size > 1: 45 | # has vpp 46 | batch_generator = [batches] * vpp_size # number of vpp chunks 47 | batch_generator = [iter(b) for b in batch_generator] 48 | else: 49 | # no vpp 50 | batch_generator = iter(batches) 51 | return batch_generator 52 | -------------------------------------------------------------------------------- /verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from megatron.core import parallel_state as mpu 19 | 20 | 21 | def mark_parameter_as_sequence_parallel(parameter): 22 | setattr(parameter, 'sequence_parallel', True) 23 | 24 | 25 | def is_sequence_parallel_param(param): 26 | return hasattr(param, 'sequence_parallel') and param.sequence_parallel 27 | 28 | 29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor): 30 | """pad the tokens such that the total length is a multiple of sp world size 31 | 32 | Args: 33 | unpad_tokens: (total_nnz, ...). Tokens after removing padding 34 | 35 | Returns: 36 | 37 | """ 38 | total_nnz = unpad_tokens.shape[0] 39 | sp_world_size = mpu.get_tensor_model_parallel_world_size() 40 | 41 | if total_nnz % sp_world_size == 0: 42 | pad_size = 0 43 | else: 44 | pad_size = sp_world_size - total_nnz % sp_world_size 45 | 46 | if pad_size > 0: 47 | if unpad_tokens.ndim == 1: 48 | unpad_tokens = F.pad(unpad_tokens, (0, pad_size)) 49 | elif unpad_tokens.ndim == 2: 50 | unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size)) 51 | else: 52 | raise NotImplementedError(f'Padding dim {unpad_tokens.ndim()} is not supported') 53 | 54 | return unpad_tokens 55 | -------------------------------------------------------------------------------- /verl/utils/py_functional.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contain small python utility functions 16 | """ 17 | 18 | from typing import Dict 19 | from types import SimpleNamespace 20 | 21 | 22 | def union_two_dict(dict1: Dict, dict2: Dict): 23 | """Union two dict. Will throw an error if there is an item not the same object with the same key. 24 | 25 | Args: 26 | dict1: 27 | dict2: 28 | 29 | Returns: 30 | 31 | """ 32 | for key, val in dict2.items(): 33 | if key in dict1: 34 | assert dict2[key] == dict1[key], \ 35 | f'{key} in meta_dict1 and meta_dict2 are not the same object' 36 | dict1[key] = val 37 | 38 | return dict1 39 | 40 | 41 | def append_to_dict(data: Dict, new_data: Dict): 42 | for key, val in new_data.items(): 43 | if key not in data: 44 | data[key] = [] 45 | data[key].append(val) 46 | 47 | 48 | class NestedNamespace(SimpleNamespace): 49 | 50 | def __init__(self, dictionary, **kwargs): 51 | super().__init__(**kwargs) 52 | for key, value in dictionary.items(): 53 | if isinstance(value, dict): 54 | self.__setattr__(key, NestedNamespace(value)) 55 | else: 56 | self.__setattr__(key, value) 57 | -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import ray 19 | 20 | import concurrent.futures 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | 25 | def put_data(index, data): 26 | return index, ray.put(data) 27 | 28 | if max_workers is None: 29 | max_workers = min(len(data_list), 16) 30 | 31 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 32 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 33 | res_lst = [] 34 | for future in concurrent.futures.as_completed(data_list_f): 35 | res_lst.append(future.result()) 36 | 37 | # reorder based on index 38 | output = [None for _ in range(len(data_list))] 39 | for res in res_lst: 40 | index, data_ref = res 41 | output[index] = data_ref 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | 18 | def extract_solution(solution_str, method='strict'): 19 | assert method in ['strict', 'flexible'] 20 | 21 | if method == 'strict': 22 | # this also tests the formatting of the model 23 | solution = re.search("#### (\\-?[0-9\\.\\,]+)", solution_str) 24 | if solution is None: 25 | final_answer = None 26 | else: 27 | final_answer = solution.group(0) 28 | final_answer = final_answer.split('#### ')[1].replace(',', '').replace('$', '') 29 | elif method == 'flexible': 30 | answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str) 31 | final_answer = None 32 | if len(answer) == 0: 33 | # no reward is there is no answer 34 | pass 35 | else: 36 | invalid_str = ['', '.'] 37 | # find the last number that is not '.' 38 | for final_answer in reversed(answer): 39 | if final_answer not in invalid_str: 40 | break 41 | return final_answer 42 | 43 | 44 | def compute_score(solution_str, ground_truth, method='strict', format_score=0., score=1.): 45 | """The scoring function for GSM8k. 46 | 47 | Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024. 48 | 49 | Args: 50 | solution_str: the solution text 51 | ground_truth: the ground truth 52 | method: the method to extract the solution, choices are 'strict' and 'flexible' 53 | format_score: the score for the format 54 | score: the score for the correct answer 55 | """ 56 | answer = extract_solution(solution_str=solution_str, method=method) 57 | if answer is None: 58 | return 0 59 | else: 60 | if answer == ground_truth: 61 | return score 62 | else: 63 | return format_score -------------------------------------------------------------------------------- /verl/utils/reward_score/multiply.py: -------------------------------------------------------------------------------- 1 | import re 2 | import random 3 | 4 | 5 | def extract_solution(solution_str): 6 | # Remove everything before the first "Assistant:" 7 | if "Assistant:" in solution_str: 8 | solution_str = solution_str.split("Assistant:", 1)[1] 9 | else: 10 | return None 11 | 12 | answer_pattern = r'(.*?)' 13 | match = re.finditer(answer_pattern, solution_str) 14 | matches = list(match) 15 | if matches: 16 | final_answer = matches[-1].group(1).strip() 17 | else: 18 | final_answer = None 19 | if final_answer is not None: 20 | try: 21 | int_final_answer = int(final_answer) 22 | except ValueError: 23 | final_answer = None 24 | return final_answer 25 | 26 | 27 | def compute_score(solution_str, ground_truth, method='strict', format_score=0.1, score=1.): 28 | """The scoring function for GSM8k. 29 | 30 | Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024. 31 | 32 | Args: 33 | solution_str: the solution text 34 | ground_truth: the ground truth 35 | method: the method to extract the solution, choices are 'strict' and 'flexible' 36 | format_score: the score for the format 37 | score: the score for the correct answer 38 | """ 39 | answer = extract_solution(solution_str=solution_str) 40 | do_print = random.randint(1, 64) == 1 41 | if do_print: 42 | print(f"--------------------------------") 43 | print(f"Ground truth: {ground_truth} | Extracted answer: {answer}") 44 | print(f"Solution string: {solution_str}") 45 | 46 | if answer is None: 47 | if do_print: 48 | print(f"No answer found") 49 | return 0 50 | else: 51 | if int(answer) == int(ground_truth): 52 | if do_print: 53 | print(f"Correct answer: {answer}") 54 | return score 55 | else: 56 | if do_print: 57 | print(f"Incorrect answer {answer} | Ground truth: {ground_truth}") 58 | return format_score 59 | -------------------------------------------------------------------------------- /verl/utils/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utils for tokenization.""" 15 | import warnings 16 | 17 | __all__ = ['hf_tokenizer'] 18 | 19 | 20 | def set_pad_token_id(tokenizer): 21 | """Set pad_token_id to eos_token_id if it is None. 22 | 23 | Args: 24 | tokenizer (transformers.PreTrainedTokenizer): The tokenizer to be set. 25 | 26 | """ 27 | if tokenizer.pad_token_id is None: 28 | tokenizer.pad_token_id = tokenizer.eos_token_id 29 | warnings.warn(f'tokenizer.pad_token_id is None. Now set to {tokenizer.eos_token_id}') 30 | if tokenizer.pad_token is None: 31 | tokenizer.pad_token = tokenizer.eos_token 32 | warnings.warn(f'tokenizer.pad_token is None. Now set to {tokenizer.eos_token}') 33 | 34 | 35 | def hf_tokenizer(name_or_path, correct_pad_token=True, correct_gemma2=True, **kwargs): 36 | """Create a huggingface pretrained tokenizer. 37 | 38 | Args: 39 | name (str): The name of the tokenizer. 40 | correct_pad_token (bool): Whether to correct the pad token id. 41 | correct_gemma2 (bool): Whether to correct the gemma2 tokenizer. 42 | **kwargs: The keyword arguments for the tokenizer. 43 | 44 | Returns: 45 | transformers.PreTrainedTokenizer: The pretrained tokenizer. 46 | 47 | """ 48 | from transformers import AutoTokenizer 49 | if correct_gemma2 and isinstance(name_or_path, str) and 'gemma-2-2b-it' in name_or_path: 50 | # the EOS token in gemma2 is ambiguious, which may worsen RL performance. 51 | # https://huggingface.co/google/gemma-2-2b-it/commit/17a01657f5c87135bcdd0ec7abb4b2dece04408a 52 | warnings.warn('Found gemma-2-2b-it tokenizer. Set eos_token and eos_token_id to and 107.') 53 | kwargs['eos_token'] = '' 54 | kwargs['eos_token_id'] = 107 55 | tokenizer = AutoTokenizer.from_pretrained(name_or_path, **kwargs) 56 | if correct_pad_token: 57 | set_pad_token_id(tokenizer) 58 | return tokenizer -------------------------------------------------------------------------------- /verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Adapted from Cruise. 16 | """ 17 | 18 | import torch 19 | 20 | from typing import Union 21 | 22 | HALF_LIST = [16, "16", "fp16", "float16"] 23 | FLOAT_LIST = [32, "32", "fp32", "float32"] 24 | BFLOAT_LIST = ["bf16", "bfloat16"] 25 | 26 | 27 | class PrecisionType(object): 28 | """Type of precision used. 29 | 30 | >>> PrecisionType.HALF == 16 31 | True 32 | >>> PrecisionType.HALF in (16, "16") 33 | True 34 | """ 35 | 36 | HALF = "16" 37 | FLOAT = "32" 38 | FULL = "64" 39 | BFLOAT = "bf16" 40 | MIXED = "mixed" 41 | 42 | @staticmethod 43 | def supported_type(precision: Union[str, int]) -> bool: 44 | return any(x == precision for x in PrecisionType) 45 | 46 | @staticmethod 47 | def supported_types() -> list[str]: 48 | return [x.value for x in PrecisionType] 49 | 50 | @staticmethod 51 | def is_fp16(precision): 52 | return precision in HALF_LIST 53 | 54 | @staticmethod 55 | def is_fp32(precision): 56 | return precision in FLOAT_LIST 57 | 58 | @staticmethod 59 | def is_bf16(precision): 60 | return precision in BFLOAT_LIST 61 | 62 | @staticmethod 63 | def to_dtype(precision): 64 | if precision in HALF_LIST: 65 | return torch.float16 66 | elif precision in FLOAT_LIST: 67 | return torch.float32 68 | elif precision in BFLOAT_LIST: 69 | return torch.bfloat16 70 | else: 71 | raise RuntimeError(f"unexpected precision: {precision}") 72 | 73 | @staticmethod 74 | def to_str(precision): 75 | if precision == torch.float16: 76 | return 'fp16' 77 | elif precision == torch.float32: 78 | return 'fp32' 79 | elif precision == torch.bfloat16: 80 | return 'bf16' 81 | else: 82 | raise RuntimeError(f"unexpected precision: {precision}") 83 | -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.1 -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/workers/actor/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for Actor 16 | """ 17 | from abc import ABC, abstractmethod 18 | from typing import Iterable, Dict 19 | 20 | from verl import DataProto 21 | import torch 22 | 23 | __all__ = ['BasePPOActor'] 24 | 25 | 26 | class BasePPOActor(ABC): 27 | 28 | def __init__(self, config): 29 | """The base class for PPO actor 30 | 31 | Args: 32 | config (DictConfig): a config passed to the PPOActor. We expect the type to be 33 | DictConfig (https://omegaconf.readthedocs.io/), but it can be any namedtuple in general. 34 | """ 35 | super().__init__() 36 | self.config = config 37 | 38 | @abstractmethod 39 | def compute_log_prob(self, data: DataProto) -> torch.Tensor: 40 | """Compute logits given a batch of data. 41 | 42 | Args: 43 | data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```, 44 | ```attention_mask``` and ```position_ids```. 45 | 46 | Returns: 47 | DataProto: a DataProto containing the key ```log_probs``` 48 | 49 | 50 | """ 51 | pass 52 | 53 | @abstractmethod 54 | def update_policy(self, data: DataProto) -> Dict: 55 | """Update the policy with an iterator of DataProto 56 | 57 | Args: 58 | data (DataProto): an iterator over the DataProto that returns by 59 | ```make_minibatch_iterator``` 60 | 61 | Returns: 62 | Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model 63 | such as ```loss```, ```grad_norm```, etc,. 64 | 65 | """ 66 | pass 67 | -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for reward model 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | from verl import DataProto 21 | 22 | 23 | class BasePPORewardModel(ABC): 24 | 25 | def __init__(self, config): 26 | self.config = config 27 | 28 | @abstractmethod 29 | def compute_reward(self, data: DataProto) -> DataProto: 30 | """Computing reward given input_ids. The transformers should output a tensor with shape 31 | [batch_size, sequence_length], and the value at [EOS] mask should be gathered. 32 | 33 | Args: 34 | data: must contain keys "input_ids", "attention_mask" and "position_ids". 35 | - input_ids: [batch_size, sequence_length] 36 | - attention_mask: [batch_size, sequence_length] 37 | - position_ids: [batch_size, sequence_length] 38 | 39 | Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward. 40 | Other position should have zero reward. Note that this may change in the future if we use 41 | dense reward. So, we leave the interface for general case. 42 | - reward: [batch_size, sequence_length]. 43 | 44 | """ 45 | pass 46 | -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .vllm_rollout import vLLMRollout -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available 16 | 17 | from .base import BaseShardingManager 18 | from .fsdp_ulysses import FSDPUlyssesShardingManager 19 | 20 | AllGatherPPModel = None 21 | 22 | if is_megatron_core_available() and is_vllm_available(): 23 | from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager 24 | elif AllGatherPPModel is not None: 25 | pass 26 | else: 27 | AllGatherPPModel = None 28 | MegatronVLLMShardingManager = None 29 | 30 | if is_vllm_available(): 31 | from .fsdp_vllm import FSDPVLLMShardingManager 32 | else: 33 | FSDPVLLMShardingManager = None 34 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | --------------------------------------------------------------------------------