├── .gitignore
├── LICENSE
├── Notice.txt
├── README.md
├── data
    └── kk
    │   └── instruct
    │       ├── 3ppl
    │           ├── test.parquet
    │           └── train.parquet
    │       ├── 4ppl
    │           ├── test.parquet
    │           └── train.parquet
    │       ├── 5ppl
    │           ├── test.parquet
    │           └── train.parquet
    │       ├── 6ppl
    │           ├── test.parquet
    │           └── train.parquet
    │       └── 7ppl
    │           ├── test.parquet
    │           └── train.parquet
├── docker
    ├── Dockerfile.ngc.vllm
    └── Dockerfile.vemlp.vllm.te
├── docs
    ├── Makefile
    ├── README.md
    ├── _static
    │   └── logo.png
    ├── advance
    │   ├── dpo_extension.rst
    │   ├── fsdp_extension.rst
    │   ├── megatron_extension.rst
    │   └── placement.rst
    ├── conf.py
    ├── examples
    │   ├── config.rst
    │   ├── gsm8k_example.rst
    │   └── ppo_code_architecture.rst
    ├── experiment
    │   └── ppo.rst
    ├── faq
    │   └── faq.rst
    ├── index.rst
    ├── preparation
    │   ├── prepare_data.rst
    │   └── reward_function.rst
    ├── requirements-docs.txt
    ├── start
    │   ├── install.rst
    │   └── quickstart.rst
    └── workers
    │   ├── fsdp_workers.rst
    │   ├── megatron_workers.rst
    │   └── ray_trainer.rst
├── eval_kk
    ├── compute_score.py
    ├── eval.sh
    ├── kk_processor.py
    ├── kk_prompt.py
    └── main_eval_instruct.py
├── examples
    ├── data_preprocess
    │   ├── arth.py
    │   ├── countdown.py
    │   ├── full_hh_rlhf.py
    │   ├── gsm8k.py
    │   ├── hellaswag.py
    │   ├── kk.py
    │   ├── math_dataset.py
    │   └── multiply.py
    ├── generation
    │   └── run_deepseek_v2_lite_math.sh
    ├── grpo_trainer
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_seq_balance.sh
    │   ├── run_qwen2-7b.sh
    │   └── run_qwen2-7b_seq_balance.sh
    ├── ppo_trainer
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_sp2.sh
    │   ├── run_deepseek_full_hh_rlhf.sh
    │   ├── run_deepseek_math_gsm8k_megatron.sh
    │   ├── run_deepseek_megatron.sh
    │   ├── run_gemma.sh
    │   ├── run_qwen2-7b.sh
    │   ├── run_qwen2-7b_rm.sh
    │   ├── run_qwen2-7b_rm_seq_balance.sh
    │   ├── run_qwen2-7b_seq_balance.sh
    │   ├── run_qwen2.5-32b.sh
    │   └── verl_getting_started.ipynb
    ├── ray
    │   └── tutorial.ipynb
    ├── sft
    │   └── gsm8k
    │   │   ├── run_deepseek_6b7.sh
    │   │   ├── run_gemma_2b.sh
    │   │   └── run_gemma_7b.sh
    └── split_placement
    │   ├── README.md
    │   ├── config
    │       └── ppo_trainer_split.yaml
    │   ├── main_ppo_split.py
    │   ├── run_deepseek7b_llm.sh
    │   └── split_monkey_patch.py
├── main_grpo.sh
├── math_eval
    ├── aime_2021_2024.jsonl
    ├── amc.jsonl
    ├── auto_test_aime.sh
    ├── test_aime.py
    ├── test_aime.sh
    ├── test_amc.py
    └── test_amc.sh
├── patches
    └── megatron_v4.patch
├── pics
    ├── response.png
    ├── response_mean_length.png
    ├── response_mean_length_v2.png
    ├── teaser.png
    └── test_score_plot_v1.jpg
├── pyproject.toml
├── requirements.txt
├── scripts
    ├── curriculum.sh
    ├── format.sh
    ├── train_grpo_4gpu_7Binstruct.sh
    ├── train_ppo_3B_4gpu.sh
    ├── train_ppo_7B_4gpu.sh
    └── train_reinforce_plus_4gpu_7Binstruct.sh
├── setup.py
├── tests
    ├── __init__.py
    ├── e2e
    │   ├── __init__.py
    │   ├── arithmetic_sequence
    │   │   ├── data
    │   │   │   ├── create_dataset.py
    │   │   │   ├── test.parquet
    │   │   │   └── train.parquet
    │   │   ├── model
    │   │   │   ├── config.json
    │   │   │   ├── create_model_tokenizer.py
    │   │   │   ├── generation_config.json
    │   │   │   ├── model.safetensors
    │   │   │   └── tokenizer_config.json
    │   │   └── rl
    │   │   │   ├── README.md
    │   │   │   ├── config
    │   │   │       └── ray_trainer.yaml
    │   │   │   └── main_trainer.py
    │   ├── check_results.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   └── digit_completion
    │   │   │   ├── __init__.py
    │   │   │   ├── task.py
    │   │   │   └── tokenizer.py
    │   ├── run_qwen_gsm8k_function_rm.sh
    │   ├── run_qwen_gsm8k_function_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_model_rm.sh
    │   ├── run_qwen_gsm8k_model_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_model_rm_seq_balance.sh
    │   ├── run_qwen_gsm8k_model_rm_ulysses.sh
    │   ├── run_ray_trainer.sh
    │   └── run_ray_trainer_rmpad.sh
    ├── gpu_utility
    │   ├── test_memory_buffers.py
    │   ├── test_ops.py
    │   └── test_torch_functional.py
    ├── model
    │   ├── test_transformer.py
    │   └── test_transformers_ulysses.py
    ├── ray
    │   ├── check_worker_alive
    │   │   └── main.py
    │   ├── detached_worker
    │   │   ├── README.md
    │   │   ├── client.py
    │   │   ├── run.sh
    │   │   └── server.py
    │   ├── test_check_worker_alive.py
    │   ├── test_colocated_workers.py
    │   ├── test_data_transfer.py
    │   ├── test_driverfunc_to_worker.py
    │   ├── test_high_level_scheduling_api.py
    │   ├── test_ray_local_envs.py
    │   ├── test_rvdz.py
    │   ├── test_worker_group_basics.py
    │   └── test_worker_group_torch.py
    ├── rollout
    │   ├── run_fsdp_vllm.py
    │   └── test_vllm_hf_loader.py
    ├── sanity
    │   ├── check_license.py
    │   └── test_import.py
    ├── utility
    │   └── test_tensor_dict_utilities.py
    └── verl
    │   └── utils
    │       └── dataset
    │           ├── test_rl_dataset.py
    │           ├── test_rm_dataset.py
    │           └── test_sft_dataset.py
└── verl
    ├── __init__.py
    ├── models
        ├── README.md
        ├── __init__.py
        ├── llama
        │   ├── __init__.py
        │   └── megatron
        │   │   ├── __init__.py
        │   │   ├── checkpoint_utils
        │   │       ├── __init__.py
        │   │       ├── llama_loader.py
        │   │       └── llama_saver.py
        │   │   ├── layers
        │   │       ├── __init__.py
        │   │       ├── parallel_attention.py
        │   │       ├── parallel_decoder.py
        │   │       ├── parallel_linear.py
        │   │       ├── parallel_mlp.py
        │   │       └── parallel_rmsnorm.py
        │   │   └── modeling_llama_megatron.py
        ├── registry.py
        ├── transformers
        │   ├── __init__.py
        │   ├── llama.py
        │   ├── monkey_patch.py
        │   └── qwen2.py
        └── weight_loader_registry.py
    ├── protocol.py
    ├── single_controller
        ├── __init__.py
        ├── base
        │   ├── __init__.py
        │   ├── decorator.py
        │   ├── megatron
        │   │   ├── __init__.py
        │   │   ├── worker.py
        │   │   └── worker_group.py
        │   ├── register_center
        │   │   ├── __init__.py
        │   │   └── ray.py
        │   ├── worker.py
        │   └── worker_group.py
        ├── ray
        │   ├── __init__.py
        │   ├── base.py
        │   └── megatron.py
        └── version
        │   └── version
    ├── third_party
        ├── __init__.py
        └── vllm
        │   ├── __init__.py
        │   ├── vllm_v_0_3_1
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── tokenizer.py
        │       ├── weight_loaders.py
        │       └── worker.py
        │   ├── vllm_v_0_4_2
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   ├── vllm_v_0_5_4
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   └── vllm_v_0_6_3
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
    ├── trainer
        ├── __init__.py
        ├── config
        │   ├── evaluation.yaml
        │   ├── generation.yaml
        │   ├── ppo_megatron_trainer.yaml
        │   ├── ppo_trainer.yaml
        │   └── sft_trainer.yaml
        ├── fsdp_sft_trainer.py
        ├── main_eval.py
        ├── main_generation.py
        ├── main_ppo.py
        ├── ppo
        │   ├── __init__.py
        │   ├── core_algos.py
        │   └── ray_trainer.py
        └── runtime_env.yaml
    ├── utils
        ├── __init__.py
        ├── config.py
        ├── dataset
        │   ├── README.md
        │   ├── __init__.py
        │   ├── rl_dataset.py
        │   ├── rm_dataset.py
        │   └── sft_dataset.py
        ├── debug
        │   ├── __init__.py
        │   ├── performance.py
        │   └── trajectory_tracker.py
        ├── distributed.py
        ├── flops_counter.py
        ├── fs.py
        ├── fsdp_utils.py
        ├── hdfs_io.py
        ├── import_utils.py
        ├── logger
        │   ├── __init__.py
        │   └── aggregate_logger.py
        ├── logging_utils.py
        ├── megatron
        │   ├── __init__.py
        │   ├── memory.py
        │   ├── optimizer.py
        │   ├── optimizer_config.py
        │   ├── pipeline_parallel.py
        │   ├── sequence_parallel.py
        │   └── tensor_parallel.py
        ├── megatron_utils.py
        ├── memory_buffer.py
        ├── model.py
        ├── py_functional.py
        ├── ray_utils.py
        ├── rendezvous
        │   ├── __init__.py
        │   └── ray_backend.py
        ├── reward_score
        │   ├── __init__.py
        │   ├── countdown.py
        │   ├── gsm8k.py
        │   ├── kk.py
        │   ├── math.py
        │   └── multiply.py
        ├── seqlen_balancing.py
        ├── tokenizer.py
        ├── torch_dtypes.py
        ├── torch_functional.py
        ├── tracking.py
        └── ulysses.py
    ├── version
        └── version
    └── workers
        ├── __init__.py
        ├── actor
            ├── __init__.py
            ├── base.py
            ├── dp_actor.py
            └── megatron_actor.py
        ├── critic
            ├── __init__.py
            ├── base.py
            ├── dp_critic.py
            └── megatron_critic.py
        ├── fsdp_workers.py
        ├── megatron_workers.py
        ├── reward_model
            ├── __init__.py
            ├── base.py
            └── megatron
            │   ├── __init__.py
            │   └── reward_model.py
        ├── rollout
            ├── __init__.py
            ├── base.py
            ├── hf_rollout.py
            ├── naive
            │   ├── __init__.py
            │   └── naive_rollout.py
            ├── tokenizer.py
            └── vllm_rollout
            │   ├── __init__.py
            │   └── vllm_rollout.py
        └── sharding_manager
            ├── __init__.py
            ├── base.py
            ├── fsdp_ulysses.py
            ├── fsdp_vllm.py
            └── megatron_vllm.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | **/*.pt
  2 | **/checkpoints
  3 | **/wget-log
  4 | **/_build/
  5 | **/*.ckpt
  6 | **/outputs
  7 | **/*.tar.gz
  8 | **/playground
  9 | **/wandb
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | dataset/*
 16 | tensorflow/my_graph/*
 17 | .idea/
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | env/
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *,cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # IPython Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # dotenv
 91 | .env
 92 | 
 93 | # virtualenv
 94 | venv/
 95 | ENV/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # vscode
104 | .vscode
105 | 
106 | # Mac
107 | .DS_Store
108 | 
109 | # output logs
110 | tests/e2e/toy_examples/deepspeed/synchronous/output.txt
111 | 
112 | # vim
113 | *.swp
114 | 


--------------------------------------------------------------------------------
/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/data/kk/instruct/3ppl/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/3ppl/test.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/3ppl/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/3ppl/train.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/4ppl/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/4ppl/test.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/4ppl/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/4ppl/train.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/5ppl/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/5ppl/test.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/5ppl/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/5ppl/train.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/6ppl/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/6ppl/test.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/6ppl/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/6ppl/train.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/7ppl/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/7ppl/test.parquet


--------------------------------------------------------------------------------
/data/kk/instruct/7ppl/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/data/kk/instruct/7ppl/train.parquet


--------------------------------------------------------------------------------
/docker/Dockerfile.ngc.vllm:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:24.05-py3
 2 | 
 3 | # uninstall nv-pytorch fork
 4 | RUN pip3 uninstall pytorch-quantization \
 5 |      pytorch-triton \
 6 |      torch \
 7 |      torch-tensorrt \
 8 |      torchvision \
 9 |      xgboost transformer_engine flash_attn \
10 |      apex megatron-core -y
11 | 
12 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124
13 | 
14 | # make sure torch version is kept
15 | RUN pip3 install --no-cache-dir \
16 |     "torch==2.4.0" \
17 |     accelerate \
18 |     codetiming \
19 |     datasets \
20 |     dill \
21 |     hydra-core \
22 |     numpy \
23 |     pybind11 \
24 |     tensordict \
25 |     "transformers<=4.46.0"
26 | 
27 | # ray is installed via vllm
28 | RUN pip3 install --no-cache-dir vllm==0.6.3
29 | 
30 | # we choose flash-attn v2.7.0 or v2.7.2 which contain pre-built wheels
31 | RUN pip3 install --no-cache-dir --no-build-isolation flash-attn==2.7.0.post2
32 | 
33 | # install apex, set MAX_JOBS to avoid OOMs
34 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
35 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
36 |     git+https://github.com/NVIDIA/apex
37 | 
38 | # install Transformer Engine, which requires FA 2.5.8
39 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation
40 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7
41 | 
42 | # Pin wandb to v0.18 since v0.19.1 is released with ImportError
43 | RUN pip3 install wandb==0.18.7 py-spy
44 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.vemlp.vllm.te:
--------------------------------------------------------------------------------
 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE .
 2 | 
 3 | # the one in docker.io is an alias for the one veturbo
 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124
 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base
 6 | 
 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
 8 | # unset for now
 9 | RUN pip3 config unset global.index-url
10 | 
11 | # transformers 4.47.0 contains the following bug:
12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask'
13 | RUN pip3 install --no-cache-dir \
14 |     torch==2.4.0 \
15 |     accelerate \
16 |     codetiming \
17 |     dill \
18 |     hydra-core \
19 |     numpy \
20 |     pybind11 \
21 |     tensordict \
22 |     "transformers <= 4.46.0"
23 | 
24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation
25 | 
26 | # vllm depends on ray, and veRL does not support ray > 2.37
27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10
28 | 
29 | # install apex
30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
31 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
32 |     git+https://github.com/NVIDIA/apex
33 | 
34 | # install Transformer Engine
35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req
36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM
37 | # - cudnn is required by TransformerEngine
38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \
39 | #     pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0
40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation
41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7
42 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # veRL documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and open localhost:8000.


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/advance/megatron_extension.rst:
--------------------------------------------------------------------------------
 1 | Add models with the Megatron-LM backend
 2 | =========================================
 3 | 
 4 | Model
 5 | -----------
 6 | 
 7 | The most challenging aspect to use the Megatron-LM backend is implementing
 8 | the models for training. Currently, we implement Llama model that
 9 | support data parallelism, tensor parallelism, pipeline parallelism (also
10 | vPP) and sequence parallelism. We also implement remove padding (sequence packing) on Llama
11 | model, which can be found in `modeling_llama_megatron.py <https://github.com/volcengine/verl/blob/main/verl/models/llama/megatron/modeling_llama_megatron.py>`_.
12 | 
13 | To support other model, users are required to implement:
14 | 
15 | 1. Implemnt a model similar to ``modeling_llama_megatron.py`` that satisfy the
16 |    parallelism requirements of Megatron-LM. Then register your model in
17 |    the `registry.py <https://github.com/volcengine/verl/blob/main/verl/models/registry.py>`_.
18 | 2. Checkpoint utils that can load full checkpoint (e.g. huggingface
19 |    checkpoint) to partitioned models during the runtime. Then register
20 |    your loader to ``weight_loader_registry`` in `weight_loader_registry.py <https://github.com/volcengine/verl/blob/main/verl/models/weight_loader_registry.py>`_.
21 | 3. Weight loader that synchronize the weight from Megatron to rollout
22 |    (vLLM) model. Note that both the actor model and rollout model are
23 |    partitioned during runtime. So, it's advisable to map the model name
24 |    in actor model implementation. Otherwise, you may need an additional
25 |    name mapping and even weight transformation. The weight loader implementation
26 |    is in `megatron_weight_loaders.py <https://github.com/volcengine/verl/blob/main/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py>`_.


--------------------------------------------------------------------------------
/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/docs/faq/faq.rst:
--------------------------------------------------------------------------------
 1 | Frequently Asked Questions
 2 | ====================================
 3 | 
 4 | Ray related
 5 | ------------
 6 | 
 7 | How to add breakpoint for debugging with distributed Ray?
 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 9 | 
10 | Please checkout the official debugging guide from Ray: https://docs.ray.io/en/latest/ray-observability/ray-distributed-debugger.html
11 | 
12 | 
13 | Distributed training
14 | ------------------------
15 | 
16 | How to run multi-node post-training with Ray?
17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18 | 
19 | You can start a ray cluster and submit a ray job, following the official guide from Ray: https://docs.ray.io/en/latest/ray-core/starting-ray.html
20 | 


--------------------------------------------------------------------------------
/docs/preparation/reward_function.rst:
--------------------------------------------------------------------------------
 1 | Implement Reward Function for Dataset
 2 | ======================================
 3 | 
 4 | For each dataset, we need to implement a reward function or utilize a reward model to compute the rewards for the generated responses.
 5 | We already pre-implemented some reward functions in `reward_score directory <https://github.com/volcengine/verl/blob/main/verl/utils/reward_score>`_.
 6 | 
 7 | Currently, we support reward functions for GSM8k and MATH datasets. For RLHF datasets (e.g.,
 8 | full_hh_rlhf) and Code Generation (e.g., APPS), we utilize reward model
 9 | and SandBox (will opensource soon) for evaluation respectively.
10 | 
11 | RewardManager
12 | -------------
13 | 
14 | In the entrypoint of the PPO Post-Training script `main_ppo.py <https://github.com/volcengine/verl/blob/main/verl/trainer/main_ppo.py#L33>`_,
15 | we implement a ``RewardManager`` that utilze pre-implemented reward functions to compute the scores for each response.
16 | 
17 | In the ``RewardManager``, we implemented a ``__call__`` function to
18 | compute the score for each response. 
19 | All the reward functions are executed by ``compute_score_fn``.
20 | The input is a ``DataProto``, which includes:
21 | 
22 | - ``input_ids``, ``attention_mask``: ``input_ids`` and ``attention_mask`` after applying
23 |   chat_template, including prompt and response
24 | - ``responses``: response tokens
25 | - ``ground_truth``: The ground truth string of the current prompt.
26 |   Stored in ``non_tensor_batch`` in the ``DataProto``, which should be
27 |   preprocessed in the parquet files.
28 | - ``data_source``: The dataset name of the current prompt. Stored in
29 |   ``non_tensor_batch`` in the ``DataProto``, which should be
30 |   preprocessed in the parquet files.
31 | 
32 | After detokenize the responses, the responses string and the ground
33 | truth string will be input to the ``compute_score_fn`` to compute the
34 | score for each response.
35 | 
36 | Reward Functions
37 | ----------------
38 | We already pre-implemented some reward functions in `reward_score directory <https://github.com/volcengine/verl/blob/main/verl/utils/reward_score>`_.
39 | 
40 | - In the `GSM8k example <https://github.com/volcengine/verl/blob/main/verl/utils/reward_score/gsm8k.py>`_, we
41 |   force the response to output the final answer after four ####, then
42 |   use string matching to compare with the ground truth. If completely
43 |   correct, score 1 point; if the format is correct, score 0.1 points; if
44 |   the format is incorrect, score 0 points.
45 | - In the `MATH example <https://github.com/volcengine/verl/blob/main/verl/utils/reward_score/math.py>`_, we follow
46 |   the implementation in `lm-evaluation-harness repository <https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/hendrycks_math/utils.py>`_.
47 | 


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | # markdown suport
2 | recommonmark
3 | # markdown table suport
4 | sphinx-markdown-tables
5 | 
6 | # theme default rtd
7 | 
8 | # crate-docs-theme
9 | sphinx-rtd-theme


--------------------------------------------------------------------------------
/eval_kk/eval.sh:
--------------------------------------------------------------------------------
 1 | model="xxx" #model path
 2 | config="vllm"
 3 | num_limit=100
 4 | max_token=8192
 5 | ntrain=0
 6 | split="test"
 7 | log_path="log/stage1_1000step"
 8 | 
 9 | mkdir -p ${log_path}
10 | 
11 | for eval_nppl in 2 3 4 5 6 7 8; do
12 |     log_file="${log_path}/${eval_nppl}.log"
13 |     echo "Starting job for eval_nppl: $eval_nppl, logging to $log_file"
14 | 
15 |     CUDA_VISIBLE_DEVICES=$((eval_nppl - 1)) PYTHONUNBUFFERED=1 python main_eval_instruct.py --batch_size 8 --model ${model} --max_token ${max_token} \
16 |     --ntrain ${ntrain} --config ${config} --limit ${num_limit} --split ${split} --temperature 1.0  --top_p 1.0 \
17 |     --problem_type "clean" --eval_nppl ${eval_nppl} > "$log_file" 2>&1 &
18 | done &  


--------------------------------------------------------------------------------
/eval_kk/kk_prompt.py:
--------------------------------------------------------------------------------
 1 | system_instruction='''Your task is to solve a logical reasoning problem. You are given set of statements from which you must logically deduce the identity of a set of characters.
 2 | 
 3 | You must infer the identity of each character. First, explain your reasoning. At the end of your answer, you must clearly state the identity of each character by following the format:
 4 | 
 5 | CONCLUSION:
 6 | (1) ...
 7 | (2) ...
 8 | (3) ...
 9 | '''
10 | 
11 | 
12 | system_instruction_no_reason='''Your task is to solve a logical reasoning problem. You are given set of statements from which you must logically deduce the identity of a set of characters.
13 | 
14 | You must infer the identity of each character. At the end of your answer, you must clearly state the identity of each character by following the format:
15 | 
16 | CONCLUSION:
17 | (1) ...
18 | (2) ...
19 | (3) ...
20 | '''
21 | 
22 | demonstration_2char_no_reason='''### Question: A very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 2 inhabitants: Jack, and Sophia. Jack tells you that Sophia is not a knave. Sophia says that If Jack is a knight then Sophia is a knight. So who is a knight and who is a knave?
23 | ### Answer:
24 | CONCLUSION:
25 | (1) Jack is a knight
26 | (2) Sophia is a knight
27 | '''
28 | 
29 | 
30 | 
31 | demonstration_2char='''### Question: A very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 2 inhabitants: Ella, and Penelope. In a statement by Ella: \"Ella is a knight or Penelope is a knight\". According to Penelope, \"Ella is a knave if and only if Penelope is a knight\". So who is a knight and who is a knave?
32 | ### Answer: Let's think step by step, by considering whether each person is lying and if that leads to contradiction. Assume Ella is a knight. Penelope cannot be a knight, because this would contradict the claim of their own. Penelope cannot be a knave, because this would contradict the false claim of their own. We have exhausted all possibilities for Penelope, so let us go back and reconsider Ella. Assume Ella is a knave. Penelope cannot be a knight, because this would contradict the false claim of Ella. Assume Penelope is a knave. This leads to a feasible solution.
33 | CONCLUSION:
34 | (1) Ella is a knave
35 | (2) Penelope is a knave
36 | '''
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_generation \
 2 |     trainer.nnodes=1 \
 3 |     trainer.n_gpus_per_node=8 \
 4 |     data.path=~/data/rlhf/gsm8k/test.parquet \
 5 |     data.prompt_key=prompt \
 6 |     data.n_samples=1 \
 7 |     data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \
 8 |     model.path=deepseek-ai/deepseek-llm-7b-chat \
 9 |     +model.trust_remote_code=True \
10 |     rollout.temperature=1.0 \
11 |     rollout.top_k=50 \
12 |     rollout.top_p=0.7 \
13 |     rollout.prompt_length=2048 \
14 |     rollout.response_length=1024 \
15 |     rollout.tensor_model_parallel_size=2 \
16 |     rollout.gpu_memory_utilization=0.8
17 | 


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.val_batch_size=1312 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=1024 \
11 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size=128 \
16 |     actor_rollout_ref.actor.use_kl_loss=True \
17 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
18 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
19 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
20 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
21 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
22 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
23 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=256 \
24 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
25 |     actor_rollout_ref.rollout.name=vllm \
26 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
27 |     actor_rollout_ref.rollout.n=5 \
28 |     actor_rollout_ref.ref.log_prob_micro_batch_size=256 \
29 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
30 |     algorithm.kl_ctrl.kl_coef=0.001 \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console','wandb'] \
33 |     trainer.project_name='verl_grpo_example_gsm8k' \
34 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
35 |     trainer.n_gpus_per_node=8 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=-1 \
38 |     trainer.test_freq=5 \
39 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.val_batch_size=1312 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
16 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
17 |     actor_rollout_ref.actor.use_kl_loss=True \
18 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
19 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
20 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
21 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
22 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
24 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
25 |     actor_rollout_ref.rollout.name=vllm \
26 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
27 |     actor_rollout_ref.rollout.n=5 \
28 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
29 |     algorithm.kl_ctrl.kl_coef=0.001 \
30 |     trainer.critic_warmup=0 \
31 |     trainer.logger=['console','wandb'] \
32 |     trainer.project_name='verl_grpo_example_gsm8k' \
33 |     trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \
34 |     trainer.n_gpus_per_node=8 \
35 |     trainer.nnodes=1 \
36 |     trainer.save_freq=-1 \
37 |     trainer.test_freq=5 \
38 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_qwen2-7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     algorithm.adv_estimator=grpo \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.train_batch_size=1024 \
10 |     data.val_batch_size=1312 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=1024 \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size=128 \
18 |     actor_rollout_ref.actor.use_kl_loss=True \
19 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
20 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=256 \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
27 |     actor_rollout_ref.rollout.name=vllm \
28 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
29 |     actor_rollout_ref.rollout.n=5 \
30 |     actor_rollout_ref.ref.log_prob_micro_batch_size=256 \
31 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console','wandb'] \
35 |     trainer.project_name='verl_grpo_example_gsm8k' \
36 |     trainer.experiment_name='qwen2_7b_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=5 \
41 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     algorithm.adv_estimator=grpo \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.train_batch_size=1024 \
10 |     data.val_batch_size=1312 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=1024 \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
18 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
19 |     actor_rollout_ref.actor.use_kl_loss=True \
20 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
21 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
22 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
23 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
25 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
27 |     actor_rollout_ref.rollout.name=vllm \
28 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
29 |     actor_rollout_ref.rollout.n=5 \
30 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_grpo_example_gsm8k' \
35 |     trainer.experiment_name='qwen2_7b_function_rm_kl1e-3' \
36 |     +trainer.val_before_train=False \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=5 \
41 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=1024 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
11 |     actor_rollout_ref.actor.optim.lr=1e-6 \
12 |     actor_rollout_ref.model.use_remove_padding=True \
13 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
14 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
15 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
16 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.use_remove_padding=True \
26 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
27 |     critic.model.enable_gradient_checkpointing=False \
28 |     critic.ppo_micro_batch_size=32 \
29 |     critic.model.fsdp_config.param_offload=False \
30 |     critic.model.fsdp_config.grad_offload=False \
31 |     critic.model.fsdp_config.optimizer_offload=False \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console','wandb'] \
35 |     trainer.project_name='verl_example_gsm8k' \
36 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.total_epochs=15 $@
41 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=1024 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
11 |     actor_rollout_ref.actor.optim.lr=1e-6 \
12 |     actor_rollout_ref.model.use_remove_padding=True \
13 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
14 |     actor_rollout_ref.actor.ppo_micro_batch_size=128 \
15 |     actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \
16 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
19 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=256 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
24 |     actor_rollout_ref.ref.log_prob_micro_batch_size=256 \
25 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
26 |     critic.optim.lr=1e-5 \
27 |     critic.ulysses_sequence_parallel_size=2 \
28 |     critic.model.use_remove_padding=True \
29 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
30 |     critic.model.enable_gradient_checkpointing=False \
31 |     critic.ppo_micro_batch_size=64 \
32 |     critic.model.fsdp_config.param_offload=False \
33 |     critic.model.fsdp_config.grad_offload=False \
34 |     critic.model.fsdp_config.optimizer_offload=False \
35 |     algorithm.kl_ctrl.kl_coef=0.001 \
36 |     trainer.critic_warmup=0 \
37 |     trainer.logger=['console','wandb'] \
38 |     trainer.project_name='verl_example_gsm8k' \
39 |     trainer.experiment_name='deepseek_llm_7b_function_rm_sp2' \
40 |     trainer.n_gpus_per_node=8 \
41 |     trainer.nnodes=1 \
42 |     trainer.save_freq=-1 \
43 |     trainer.test_freq=5 \
44 |     trainer.total_epochs=15 $@
45 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | train_files=$HOME/data/full_hh_rlhf/rl/train.parquet
 4 | test_files=$HOME/data/full_hh_rlhf/rl/train.parquet # no use
 5 | 
 6 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
 7 |     data.train_files="$train_files" \
 8 |     data.val_files="$test_files" \
 9 |     data.train_batch_size=512 \
10 |     data.val_batch_size=128 \
11 |     data.max_prompt_length=128 \
12 |     data.max_response_length=128 \
13 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size=16 \
17 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=16 \
18 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
19 |     actor_rollout_ref.rollout.name=vllm \
20 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
21 |     actor_rollout_ref.ref.log_prob_micro_batch_size=16 \
22 |     actor_rollout_ref.ref.param_offload=False \
23 |     critic.optim.lr=1e-5 \
24 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
25 |     critic.model.enable_gradient_checkpointing=False \
26 |     critic.ppo_micro_batch_size=16 \
27 |     reward_model.enable=True \
28 |     reward_model.megatron.tensor_model_parallel_size=4 \
29 |     reward_model.model.path=deepseek-ai/deepseek-llm-7b-chat \
30 |     reward_model.micro_batch_size=16 \
31 |     reward_model.param_offload=False \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console','wandb'] \
35 |     trainer.project_name='verl_megatron_full_hh_rlhf_examples' \
36 |     trainer.experiment_name='deepseek_llm_7b_model_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=5 \
41 |     trainer.total_epochs=100 $@
42 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 5 | math_train_path=$HOME/data/math/train.parquet
 6 | math_test_path=$HOME/data/math/test.parquet
 7 | 
 8 | train_files="['$gsm8k_train_path', '$math_train_path']"
 9 | test_files="['$gsm8k_test_path', '$math_test_path']"
10 | 
11 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
12 |     data.train_files="$train_files" \
13 |     data.val_files="$test_files" \
14 |     data.train_batch_size=1024 \
15 |     data.val_batch_size=6312 \
16 |     data.max_prompt_length=1024 \
17 |     data.max_response_length=512 \
18 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
19 |     actor_rollout_ref.actor.optim.lr=1e-6 \
20 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
21 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
22 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=32 \
23 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
24 |     actor_rollout_ref.rollout.name=vllm \
25 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
26 |     actor_rollout_ref.ref.log_prob_micro_batch_size=32 \
27 |     critic.optim.lr=1e-5 \
28 |     critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
29 |     critic.model.enable_gradient_checkpointing=False \
30 |     critic.ppo_micro_batch_size=32 \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_megatron_math_gsm8k_examples' \
35 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=5 \
40 |     trainer.total_epochs=100 $@
41 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=1024 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
11 |     actor_rollout_ref.actor.optim.lr=2e-6 \
12 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
13 |     actor_rollout_ref.actor.ppo_micro_batch_size=64 \
14 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=64 \
15 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
16 |     actor_rollout_ref.rollout.name=vllm \
17 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
18 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
19 |     critic.optim.lr=2e-5 \
20 |     critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
21 |     critic.model.enable_gradient_checkpointing=False \
22 |     critic.ppo_micro_batch_size=64 \
23 |     algorithm.kl_ctrl.kl_coef=0.001 \
24 |     trainer.critic_warmup=0 \
25 |     trainer.logger=['console','wandb'] \
26 |     trainer.project_name='verl_megatron_gsm8k_examples' \
27 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
28 |     trainer.n_gpus_per_node=8 \
29 |     trainer.nnodes=1 \
30 |     trainer.save_freq=-1 \
31 |     trainer.total_epochs=15 \
32 |     +trainer.val_before_train=False $@
33 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_gemma.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=512 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=1024 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=google/gemma-2-2b-it \
11 |     actor_rollout_ref.actor.optim.lr=1e-6 \
12 |     actor_rollout_ref.model.use_remove_padding=True \
13 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
14 |     actor_rollout_ref.actor.ppo_micro_batch_size=4 \
15 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
16 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=4 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size=4 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.use_remove_padding=True \
26 |     critic.model.path=google/gemma-2-2b-it \
27 |     critic.model.enable_gradient_checkpointing=False \
28 |     critic.ppo_micro_batch_size=4 \
29 |     critic.model.fsdp_config.param_offload=False \
30 |     critic.model.fsdp_config.grad_offload=False \
31 |     critic.model.fsdp_config.optimizer_offload=False \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console','wandb'] \
35 |     trainer.project_name='verl_example' \
36 |     trainer.experiment_name='gemma2b_function_rm' \
37 |     trainer.n_gpus_per_node=2 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=10 \
41 |     trainer.total_epochs=15 $@
42 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_qwen2-7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 5 | math_train_path=$HOME/data/math/train.parquet
 6 | math_test_path=$HOME/data/math/test.parquet
 7 | 
 8 | train_files="['$gsm8k_train_path', '$math_train_path']"
 9 | test_files="['$gsm8k_test_path', '$math_test_path']"
10 | 
11 | python3 -m verl.trainer.main_ppo \
12 |     data.train_files="$train_files" \
13 |     data.val_files="$test_files" \
14 |     data.train_batch_size=1024 \
15 |     data.val_batch_size=6312 \
16 |     data.max_prompt_length=1024 \
17 |     data.max_response_length=512 \
18 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
19 |     actor_rollout_ref.actor.optim.lr=1e-6 \
20 |     actor_rollout_ref.model.use_remove_padding=True \
21 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
22 |     actor_rollout_ref.actor.ppo_micro_batch_size=16 \
23 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
25 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
26 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=16 \
27 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
28 |     actor_rollout_ref.rollout.name=vllm \
29 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
30 |     actor_rollout_ref.ref.log_prob_micro_batch_size=16 \
31 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
32 |     critic.optim.lr=1e-5 \
33 |     critic.model.use_remove_padding=True \
34 |     critic.model.path=Qwen/Qwen2-7B-Instruct \
35 |     critic.model.enable_gradient_checkpointing=False \
36 |     critic.ppo_micro_batch_size=16 \
37 |     critic.model.fsdp_config.param_offload=False \
38 |     critic.model.fsdp_config.grad_offload=False \
39 |     critic.model.fsdp_config.optimizer_offload=False \
40 |     algorithm.kl_ctrl.kl_coef=0.001 \
41 |     trainer.critic_warmup=0 \
42 |     trainer.logger=['console','wandb'] \
43 |     trainer.project_name='verl_example' \
44 |     trainer.experiment_name='Qwen2-7B-Instruct_function_rm' \
45 |     trainer.n_gpus_per_node=8 \
46 |     trainer.nnodes=1 \
47 |     trainer.save_freq=-1 \
48 |     trainer.test_freq=10 \
49 |     trainer.total_epochs=15 $@
50 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_qwen2-7b_rm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | # Discliamer: the model used in the script is only for academic example,
 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 5 | math_train_path=$HOME/data/math/train.parquet
 6 | math_test_path=$HOME/data/math/test.parquet
 7 | 
 8 | train_files="['$gsm8k_train_path', '$math_train_path']"
 9 | test_files="['$gsm8k_test_path', '$math_test_path']"
10 | 
11 | export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
12 | 
13 | python3 -m verl.trainer.main_ppo \
14 |     data.train_files="$train_files" \
15 |     data.val_files="$test_files" \
16 |     data.train_batch_size=1024 \
17 |     data.val_batch_size=6312 \
18 |     data.max_prompt_length=1024 \
19 |     data.max_response_length=512 \
20 |     data.return_raw_chat=True \
21 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
22 |     actor_rollout_ref.actor.optim.lr=1e-6 \
23 |     actor_rollout_ref.model.use_remove_padding=True \
24 |     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
25 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
26 |     actor_rollout_ref.actor.ppo_micro_batch_size=16 \
27 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
28 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
29 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
30 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=16 \
31 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
32 |     actor_rollout_ref.rollout.name=vllm \
33 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
34 |     actor_rollout_ref.ref.log_prob_micro_batch_size=16 \
35 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
36 |     critic.optim.lr=1e-5 \
37 |     critic.model.use_remove_padding=True \
38 |     critic.optim.lr_warmup_steps_ratio=0.05 \
39 |     critic.model.path=Qwen/Qwen2-7B-Instruct \
40 |     critic.model.enable_gradient_checkpointing=False \
41 |     critic.ppo_micro_batch_size=16 \
42 |     critic.model.fsdp_config.param_offload=False \
43 |     critic.model.fsdp_config.grad_offload=False \
44 |     critic.model.fsdp_config.optimizer_offload=False \
45 |     reward_model.enable=True \
46 |     reward_model.model.path=sfairXC/FsfairX-LLaMA3-RM-v0.1\
47 |     reward_model.model.use_remove_padding=True \
48 |     reward_model.model.fsdp_config.param_offload=True \
49 |     reward_model.micro_batch_size=16 \
50 |     algorithm.kl_ctrl.kl_coef=0.001 \
51 |     trainer.critic_warmup=0 \
52 |     trainer.logger=['console','wandb'] \
53 |     trainer.project_name='verl_example' \
54 |     trainer.experiment_name='Qwen2-7B-Instruct_hybrid_rm' \
55 |     trainer.n_gpus_per_node=8 \
56 |     trainer.nnodes=1 \
57 |     trainer.save_freq=-1 \
58 |     trainer.test_freq=5 \
59 |     trainer.total_epochs=15 $@
60 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 5 | math_train_path=$HOME/data/math/train.parquet
 6 | math_test_path=$HOME/data/math/test.parquet
 7 | 
 8 | train_files="['$gsm8k_train_path', '$math_train_path']"
 9 | test_files="['$gsm8k_test_path', '$math_test_path']"
10 | 
11 | python3 -m verl.trainer.main_ppo \
12 |     data.train_files="$train_files" \
13 |     data.val_files="$test_files" \
14 |     data.train_batch_size=4096 \
15 |     data.val_batch_size=1312 \
16 |     data.max_prompt_length=4096 \
17 |     data.max_response_length=4096 \
18 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
19 |     actor_rollout_ref.actor.optim.lr=1e-6 \
20 |     actor_rollout_ref.model.use_remove_padding=True \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.actor.ppo_mini_batch_size=512 \
23 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
24 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
25 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
26 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
27 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
29 |     actor_rollout_ref.rollout.name=vllm \
30 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
31 |     actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=24000 \
32 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
33 |     actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=24000 \
34 |     critic.optim.lr=1e-5 \
35 |     critic.model.use_remove_padding=True \
36 |     critic.model.path=Qwen/Qwen2-7B-Instruct \
37 |     critic.model.enable_gradient_checkpointing=True \
38 |     critic.ppo_max_token_len_per_gpu=98304 \
39 |     critic.model.fsdp_config.param_offload=False \
40 |     critic.model.fsdp_config.grad_offload=False \
41 |     critic.model.fsdp_config.optimizer_offload=False \
42 |     algorithm.kl_ctrl.kl_coef=0.001 \
43 |     trainer.critic_warmup=0 \
44 |     trainer.logger=['console','wandb'] \
45 |     trainer.project_name='verl_example_gsm8k' \
46 |     trainer.experiment_name='qwen2-7b_function_rm_bsz8k_p4k_r4k_seq_packing' \
47 |     trainer.n_gpus_per_node=8 \
48 |     +trainer.val_before_train=False \
49 |     trainer.nnodes=1 \
50 |     trainer.save_freq=-1 \
51 |     trainer.test_freq=5 \
52 |     trainer.total_epochs=15 $@
53 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_qwen2.5-32b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 5 | math_train_path=$HOME/data/math/train.parquet
 6 | math_test_path=$HOME/data/math/test.parquet
 7 | 
 8 | train_files="['$gsm8k_train_path', '$math_train_path']"
 9 | test_files="['$gsm8k_test_path', '$math_test_path']"
10 | 
11 | python3 -m verl.trainer.main_ppo \
12 |     data.train_files="$train_files" \
13 |     data.val_files="$test_files" \
14 |     data.train_batch_size=1024 \
15 |     data.val_batch_size=6304 \
16 |     data.max_prompt_length=1024 \
17 |     data.max_response_length=1024 \
18 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-32B-Instruct \
19 |     actor_rollout_ref.model.enable_gradient_checkpointing=False \
20 |     actor_rollout_ref.actor.optim.lr=1e-6 \
21 |     actor_rollout_ref.model.use_remove_padding=True \
22 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
23 |     actor_rollout_ref.actor.ppo_micro_batch_size=16 \
24 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
25 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
26 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
27 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
29 |     actor_rollout_ref.rollout.name=vllm \
30 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \
31 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
32 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
33 |     critic.optim.lr=1e-5 \
34 |     critic.model.use_remove_padding=True \
35 |     critic.model.path=Qwen/Qwen2.5-32B-Instruct \
36 |     critic.model.enable_gradient_checkpointing=False \
37 |     critic.ppo_micro_batch_size=32 \
38 |     critic.model.fsdp_config.param_offload=False \
39 |     critic.model.fsdp_config.grad_offload=False \
40 |     critic.model.fsdp_config.optimizer_offload=False \
41 |     algorithm.kl_ctrl.kl_coef=0.0001 \
42 |     trainer.critic_warmup=0 \
43 |     trainer.logger=['console','wandb'] \
44 |     trainer.project_name='verl_example' \
45 |     trainer.experiment_name='Qwen2.5-32B-Instruct_function_rm' \
46 |     trainer.n_gpus_per_node=8 \
47 |     trainer.nnodes=4 \
48 |     trainer.save_freq=-1 \
49 |     trainer.test_freq=10 \
50 |     trainer.total_epochs=15 $@
51 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/deepseek-coder-6.7b-instruct/ # replace to your own hdfs/local path
 4 | 
 5 | nproc_per_node=$1
 6 | 
 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
 8 |      -m verl.trainer.fsdp_sft_trainer \
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.response_key=answer \
13 |     data.micro_batch_size=8 \
14 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
15 |     trainer.default_hdfs_dir=$hdfs_path \
16 |     trainer.project_name=gsm8k-sft \
17 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
18 |     trainer.total_epochs=4 \
19 |     trainer.logger=['console','wandb']


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_2b.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_gemma_2b.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     +data.prompt_dict_keys=['question'] \
23 |     +data.response_dict_keys=['answer'] \
24 |     data.micro_batch_size=8 \
25 |     model.partial_pretrain=google/gemma-2b-it \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
29 |     trainer.total_epochs=2 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-1.1-7b-it/ # replace to your own hdfs/local path
 4 | 
 5 | nproc_per_node=$1
 6 | 
 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
 8 |      -m verl.trainer.fsdp_sft_trainer \
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.response_key=answer \
13 |     data.micro_batch_size=8 \
14 |     model.partial_pretrain=google/gemma-1.1-7b-it \
15 |     trainer.default_hdfs_dir=$hdfs_path \
16 |     trainer.project_name=gsm8k-sft \
17 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
18 |     trainer.total_epochs=4 \
19 |     trainer.logger=['console','wandb']


--------------------------------------------------------------------------------
/examples/split_placement/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 main_ppo_split.py \
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=1024 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
11 |     actor_rollout_ref.actor.optim.lr=1e-6 \
12 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
13 |     actor_rollout_ref.actor.ppo_micro_batch_size=16 \
14 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
15 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
16 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
17 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=32 \
18 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
19 |     actor_rollout_ref.rollout.name=vllm \
20 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
21 |     actor_rollout_ref.ref.log_prob_micro_batch_size=32 \
22 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
23 |     critic.optim.lr=1e-5 \
24 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
25 |     critic.model.enable_gradient_checkpointing=False \
26 |     critic.ppo_micro_batch_size=16 \
27 |     critic.model.fsdp_config.param_offload=False \
28 |     critic.model.fsdp_config.grad_offload=False \
29 |     critic.model.fsdp_config.optimizer_offload=False \
30 |     algorithm.kl_ctrl.kl_coef=0.001 \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console','wandb'] \
33 |     trainer.project_name='verl_example_gsm8k' \
34 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
35 |     trainer.n_gpus_per_node=8 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=-1 \
38 |     trainer.total_epochs=15 $@
39 | 


--------------------------------------------------------------------------------
/main_grpo.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | MODEL_PATH=xxx
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | python3 -m verl.trainer.main_ppo \
 5 |     algorithm.adv_estimator=grpo \
 6 |     data.train_files=data/xxx \
 7 |     data.val_files=dataxxx \
 8 |     data.train_batch_size=64 \
 9 |     data.val_batch_size=32 \
10 |     data.max_prompt_length=400 \
11 |     data.max_response_length=2048 \
12 |     actor_rollout_ref.model.path=$MODEL_PATH\
13 |     actor_rollout_ref.actor.optim.lr=3e-7 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=32 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size=16 \
17 |     actor_rollout_ref.actor.use_kl_loss=True \
18 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
19 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
20 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
21 |     actor_rollout_ref.actor.fsdp_config.param_offload=True \
22 |     actor_rollout_ref.actor.fsdp_config.grad_offload=True \
23 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=160 \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=16 \
29 |     actor_rollout_ref.ref.log_prob_micro_batch_size=160 \
30 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['wandb'] \
34 |     trainer.project_name='GRPO_logic_KK' \
35 |     trainer.experiment_name='Qwen-7B' \
36 |     trainer.n_gpus_per_node=4 \
37 |     trainer.nnodes=1 \
38 |     trainer.default_local_dir=xxx \
39 |     trainer.default_hdfs_dir=null \
40 |     trainer.save_freq=10 \
41 |     trainer.test_freq=10 \
42 |     trainer.total_epochs=5 $@ 2>&1 | tee grpo.log
43 | 


--------------------------------------------------------------------------------
/math_eval/auto_test_aime.sh:
--------------------------------------------------------------------------------
 1 | CHECKPOINT_PATH="$1"/actor
 2 | mkdir -p /volume/ailab4sci/ztgao/log/"$2"
 3 | mkdir -p /volume/ailab4sci/ztgao/aime/"$2"
 4 | 
 5 | if [ ! -d "$CHECKPOINT_PATH" ]; then
 6 |   echo "提供的路径无效: $CHECKPOINT_PATH"
 7 |   exit 1
 8 | fi
 9 | 
10 | CHECKPOINTS=($(find "$CHECKPOINT_PATH" -mindepth 1 -maxdepth 1 -type d))
11 | 
12 | declare -A GPU_LAST_USED_TIME
13 | declare -a CHECKPOINT_QUEUE
14 | 
15 | check_gpu_free() {
16 |     local gpu_id=$1
17 |     local pid=$(nvidia-smi --query-compute-apps=pid --format=csv,noheader,nounits -i "$gpu_id")
18 |     local current_time=$(date +%s)
19 | 
20 |     if [ -n "$pid" ]; then
21 |         GPU_LAST_USED_TIME["$gpu_id"]=$current_time
22 |         return 1
23 |     fi
24 | 
25 |     if [ -z "${GPU_LAST_USED_TIME["$gpu_id"]}" ] || [ $((current_time - GPU_LAST_USED_TIME["$gpu_id"])) -gt 600 ]; then
26 |         GPU_LAST_USED_TIME["$gpu_id"]=$current_time
27 |         return 0
28 |     else
29 |         return 1
30 |     fi
31 | }
32 | 
33 | for CHECKPOINT in "${CHECKPOINTS[@]}"; do
34 |     step=$(basename "$CHECKPOINT" | sed -E 's/[^0-9]*([0-9]+)$/\1/')
35 |     log_path="/volume/ailab4sci/ztgao/log/$2/$step.log"
36 | 
37 |     if [ ! -f "$log_path" ]; then
38 |         CHECKPOINT_QUEUE+=("$CHECKPOINT")
39 |         echo "已添加 checkpoint $CHECKPOINT 到队列。"
40 |     else
41 |         echo "日志文件已存在: $log_path, 跳过此模型。"
42 |     fi
43 | done
44 | 
45 | while [ ${#CHECKPOINT_QUEUE[@]} -gt 0 ]; do
46 |     CHECKPOINT=${CHECKPOINT_QUEUE[0]}
47 |     step=$(basename "$CHECKPOINT" | sed -E 's/[^0-9]*([0-9]+)$/\1/')
48 |     log_path="/volume/ailab4sci/ztgao/log/$2/$step.log"
49 | 
50 |     GPU_ID=""
51 |     for i in $(seq 0 7); do
52 |         if check_gpu_free "$i"; then
53 |             GPU_ID=$i
54 |             break
55 |         fi
56 |     done
57 | 
58 |     if [ -z "$GPU_ID" ]; then
59 |         echo "没有空闲的 GPU，等待 30 秒后重试..."
60 |         sleep 30
61 |         continue
62 |     fi
63 | 
64 |     json_path="/volume/ailab4sci/ztgao/aime/$2/$step.json"
65 |     mkdir -p "$(dirname "$log_path")"
66 |     echo "使用 GPU $GPU_ID 处理模型: $CHECKPOINT, log: $log_path"
67 |     CUDA_VISIBLE_DEVICES=$GPU_ID python3 test_aime.py --model_path "$CHECKPOINT" --json_path "$json_path" > "$log_path" 2>&1 &
68 |     CHECKPOINT_QUEUE=("${CHECKPOINT_QUEUE[@]:1}")
69 |     sleep 2
70 | done
71 | 
72 | wait
73 | echo "所有评估任务已完成。"
74 | 


--------------------------------------------------------------------------------
/math_eval/test_aime.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python test_aime.py --stage 1 --step 120 > log/aime/1/120.log 2>&1 &
 2 | CUDA_VISIBLE_DEVICES=1 python test_aime.py --stage 1 --step 180 > log/aime/1/180.log 2>&1 &
 3 | CUDA_VISIBLE_DEVICES=2 python test_aime.py --stage 1 --step 300 > log/aime/1/300.log 2>&1 &
 4 | CUDA_VISIBLE_DEVICES=3 python test_aime.py --stage 1 --step 360 > log/aime/1/360.log 2>&1 &
 5 | CUDA_VISIBLE_DEVICES=4 python test_aime.py --stage 1 --step 420 > log/aime/1/420.log 2>&1 &
 6 | CUDA_VISIBLE_DEVICES=5 python test_aime.py --stage 1 --step 480 > log/aime/1/480.log 2>&1 &
 7 | CUDA_VISIBLE_DEVICES=6 python test_aime.py --stage 1 --step 600 > log/aime/1/600.log 2>&1 &
 8 | CUDA_VISIBLE_DEVICES=7 python test_aime.py --stage 1 --step 660 > log/aime/1/660.log 2>&1 &
 9 | 
10 | wait
11 | 
12 | CUDA_VISIBLE_DEVICES=0 python test_aime.py --stage 1 --step 720 > log/aime/1/720.log 2>&1 &
13 | CUDA_VISIBLE_DEVICES=1 python test_aime.py --stage 1 --step 780 > log/aime/1/780.log 2>&1 &
14 | CUDA_VISIBLE_DEVICES=2 python test_aime.py --stage 1 --step 900 > log/aime/1/900.log 2>&1 &
15 | CUDA_VISIBLE_DEVICES=3 python test_aime.py --stage 1 --step 960 > log/aime/1/960.log 2>&1 &
16 | CUDA_VISIBLE_DEVICES=4 python test_aime.py --stage 1 --step 1020 > log/aime/1/1020.log 2>&1 &
17 | CUDA_VISIBLE_DEVICES=5 python test_aime.py --stage 1 --step 1080 > log/aime/1/1080.log 2>&1 &
18 | CUDA_VISIBLE_DEVICES=6 python test_aime.py --stage 1 --step 1320 > log/aime/1/1320.log 2>&1 &
19 | CUDA_VISIBLE_DEVICES=7 python test_aime.py --stage 1 --step 1380 > log/aime/1/1380.log 2>&1 &
20 | 
21 | wait
22 | 
23 | CUDA_VISIBLE_DEVICES=0 python test_aime.py --stage 1 --step 1440 > log/aime/1/1440.log 2>&1 &
24 | CUDA_VISIBLE_DEVICES=1 python test_aime.py --stage 1 --step 1560 > log/aime/1/1560.log 2>&1 &
25 | CUDA_VISIBLE_DEVICES=2 python test_aime.py --stage 1 --step 1620 > log/aime/1/1620.log 2>&1 &
26 | CUDA_VISIBLE_DEVICES=3 python test_aime.py --stage 1 --step 1680 > log/aime/1/1680.log 2>&1 &
27 | CUDA_VISIBLE_DEVICES=4 python test_aime.py --stage 1 --step 1740 > log/aime/1/1740.log 2>&1 &


--------------------------------------------------------------------------------
/math_eval/test_amc.sh:
--------------------------------------------------------------------------------
 1 | # CUDA_VISIBLE_DEVICES=0 python test_amc.py --stage 1 --step 120 > log/amc/1/120.log 2>&1 &
 2 | # CUDA_VISIBLE_DEVICES=1 python test_amc.py --stage 1 --step 180 > log/amc/1/180.log 2>&1 &
 3 | # CUDA_VISIBLE_DEVICES=2 python test_amc.py --stage 1 --step 300 > log/amc/1/300.log 2>&1 &
 4 | # CUDA_VISIBLE_DEVICES=3 python test_amc.py --stage 1 --step 360 > log/amc/1/360.log 2>&1 &
 5 | # CUDA_VISIBLE_DEVICES=4 python test_amc.py --stage 1 --step 420 > log/amc/1/420.log 2>&1 &
 6 | # CUDA_VISIBLE_DEVICES=5 python test_amc.py --stage 1 --step 480 > log/amc/1/480.log 2>&1 &
 7 | # CUDA_VISIBLE_DEVICES=6 python test_amc.py --stage 1 --step 600 > log/amc/1/600.log 2>&1 &
 8 | # CUDA_VISIBLE_DEVICES=7 python test_amc.py --stage 1 --step 660 > log/amc/1/660.log 2>&1 &
 9 | 
10 | # wait
11 | 
12 | CUDA_VISIBLE_DEVICES=0 python test_amc.py --stage 1 --step 720 > log/amc/1/720.log 2>&1 &
13 | CUDA_VISIBLE_DEVICES=1 python test_amc.py --stage 1 --step 780 > log/amc/1/780.log 2>&1 &
14 | CUDA_VISIBLE_DEVICES=2 python test_amc.py --stage 1 --step 900 > log/amc/1/900.log 2>&1 &
15 | CUDA_VISIBLE_DEVICES=3 python test_amc.py --stage 1 --step 960 > log/amc/1/960.log 2>&1 &
16 | CUDA_VISIBLE_DEVICES=4 python test_amc.py --stage 1 --step 1020 > log/amc/1/1020.log 2>&1 &
17 | CUDA_VISIBLE_DEVICES=5 python test_amc.py --stage 1 --step 1080 > log/amc/1/1080.log 2>&1 &
18 | CUDA_VISIBLE_DEVICES=6 python test_amc.py --stage 1 --step 1320 > log/amc/1/1320.log 2>&1 &
19 | CUDA_VISIBLE_DEVICES=7 python test_amc.py --stage 1 --step 1380 > log/amc/1/1380.log 2>&1 &
20 | 
21 | # wait
22 | 
23 | # CUDA_VISIBLE_DEVICES=0 python test_amc.py --stage 1 --step 1440 > log/amc/1/1440.log 2>&1 &
24 | # CUDA_VISIBLE_DEVICES=1 python test_amc.py --stage 1 --step 1560 > log/amc/1/1560.log 2>&1 &
25 | # CUDA_VISIBLE_DEVICES=2 python test_amc.py --stage 1 --step 1620 > log/amc/1/1620.log 2>&1 &
26 | # CUDA_VISIBLE_DEVICES=3 python test_amc.py --stage 1 --step 1680 > log/amc/1/1680.log 2>&1 &
27 | # CUDA_VISIBLE_DEVICES=4 python test_amc.py --stage 1 --step 1740 > log/amc/1/1740.log 2>&1 &


--------------------------------------------------------------------------------
/pics/response.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/response.png


--------------------------------------------------------------------------------
/pics/response_mean_length.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/response_mean_length.png


--------------------------------------------------------------------------------
/pics/response_mean_length_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/response_mean_length_v2.png


--------------------------------------------------------------------------------
/pics/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/teaser.png


--------------------------------------------------------------------------------
/pics/test_score_plot_v1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/pics/test_score_plot_v1.jpg


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # -------------------------------
 2 | # build-system
 3 | # -------------------------------
 4 | [build-system]
 5 | requires = [
 6 |     "setuptools>=61.0",
 7 |     "wheel"
 8 | ]
 9 | build-backend = "setuptools.build_meta"
10 | 
11 | # -------------------------------
12 | # project (PEP 621 metadata)
13 | # -------------------------------
14 | [project]
15 | name = "verl"
16 | # We'll mark the version as "dynamic" because it's read from the file "verl/version/version" 
17 | # (PEP 621 calls this "dynamic version"). 
18 | # The actual version is specified in the [tool.setuptools.dynamic] section below.
19 | dynamic = ["version"]
20 | 
21 | description = "veRL: Volcano Engine Reinforcement Learning for LLM"
22 | license = {file = "LICENSE"}  # or "Apache-2.0", if you prefer an SPDX identifier
23 | readme = {file = "README.md", content-type = "text/markdown"}
24 | requires-python = ">=3.8"
25 | 
26 | authors = [
27 |   { name = "Bytedance - Seed - MLSys", email = "zhangchi.usc1992@bytedance.com" },
28 |   { name = "Bytedance - Seed - MLSys", email = "gmsheng@connect.hku.hk" },
29 | ]
30 | 
31 | # Dependencies corresponding to install_requires in setup.py
32 | dependencies = [
33 |     "accelerate",
34 |     "codetiming",
35 |     "datasets",
36 |     "dill",
37 |     "hydra-core",
38 |     "numpy",
39 |     "pybind11",
40 |     "ray",
41 |     "tensordict",
42 |     "transformers<4.48",
43 |     "vllm<=0.6.3",
44 | ]
45 | 
46 | # Optional dependencies (extras_require in setup.py)
47 | [project.optional-dependencies]
48 | test = [
49 |   "pytest", "yapf"
50 | ]
51 | 
52 | # URLs
53 | [project.urls]
54 | Homepage = "https://github.com/volcengine/verl"
55 | 
56 | # -------------------------------
57 | # tool.setuptools - Additional config
58 | # -------------------------------
59 | [tool.setuptools]
60 | # True means `setuptools` will attempt to include all relevant files in package_data automatically.
61 | # This corresponds to `include_package_data=True` in setup.py.
62 | include-package-data = true
63 | 
64 | # We read the version from a file in 'verl/version/version'
65 | [tool.setuptools.dynamic]
66 | version = {file = "verl/version/version"}
67 | 
68 | # If you need to mimic `package_dir={'': '.'}`:
69 | [tool.setuptools.package-dir]
70 | "" = "."
71 | 
72 | # If you need to include specific non-Python data (like YAML files or version file):
73 | # This is the rough equivalent of package_data={'': ['version/*'], 'verl': ['trainer/config/*.yaml']}
74 | [tool.setuptools.package-data]
75 | verl = [
76 |   "version/*",
77 |   "trainer/config/*.yaml"
78 | ]


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | codetiming
 3 | datasets
 4 | dill
 5 | flash-attn
 6 | hydra-core
 7 | numpy
 8 | pandas
 9 | pybind11
10 | ray
11 | tensordict<0.6
12 | transformers<4.48
13 | vllm==0.6.3
14 | wandb
15 | 


--------------------------------------------------------------------------------
/scripts/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip3 install --upgrade yapf
3 | yapf -ir -vv --style ./.style.yapf verl tests single_controller examples


--------------------------------------------------------------------------------
/scripts/train_grpo_4gpu_7Binstruct.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     algorithm.adv_estimator=grpo \
 7 |     data.train_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/train.parquet \
 8 |     data.val_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/test.parquet \
 9 |     data.train_batch_size=16 \
10 |     data.val_batch_size=16 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=2048 \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct-1M \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size=64 \
18 |     actor_rollout_ref.actor.use_kl_loss=True \
19 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
20 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=160 \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
27 |     actor_rollout_ref.rollout.name=vllm \
28 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
29 |     actor_rollout_ref.rollout.n=8 \
30 |     actor_rollout_ref.ref.log_prob_micro_batch_size=160 \
31 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['wandb'] \
35 |     trainer.project_name='verl_grpo_example_gsm8k' \
36 |     trainer.experiment_name='qwen2_7b_function_rm' \
37 |     trainer.n_gpus_per_node=4 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=20 \
41 |     trainer.total_epochs=1 $@
42 | 


--------------------------------------------------------------------------------
/scripts/train_ppo_3B_4gpu.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_ppo \
 2 | data.train_files=$DATA_DIR/train.parquet \
 3 | data.val_files=$DATA_DIR/test.parquet \
 4 | data.train_batch_size=8 \
 5 | data.val_batch_size=8 \
 6 | data.max_prompt_length=300 \
 7 | data.max_response_length=1024 \
 8 | actor_rollout_ref.model.path=$BASE_MODEL \
 9 | actor_rollout_ref.actor.optim.lr=5e-6 \
10 | actor_rollout_ref.actor.ppo_mini_batch_size=32 \
11 | actor_rollout_ref.actor.ppo_micro_batch_size=4 \
12 | actor_rollout_ref.rollout.log_prob_micro_batch_size=40 \
13 | actor_rollout_ref.rollout.tensor_model_parallel_size=$ROLLOUT_TP_SIZE \
14 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
15 | actor_rollout_ref.ref.log_prob_micro_batch_size=40 \
16 | actor_rollout_ref.rollout.temperature=0.6 \
17 | actor_rollout_ref.rollout.top_k=-1 \
18 | actor_rollout_ref.rollout.top_p=0.8 \
19 | actor_rollout_ref.rollout.n=4 \
20 | critic.optim.lr=1e-6 \
21 | critic.model.path=$BASE_MODEL \
22 | critic.ppo_micro_batch_size=32 \
23 | algorithm.kl_ctrl.kl_coef=0.001 \
24 | trainer.logger=['wandb'] \
25 | +trainer.val_before_train=False \
26 | trainer.default_hdfs_dir=null \
27 | trainer.n_gpus_per_node=$N_GPUS \
28 | trainer.nnodes=1 \
29 | trainer.save_freq=200 \
30 | trainer.test_freq=20 \
31 | trainer.project_name=KK \
32 | trainer.experiment_name=$EXPERIMENT_NAME \
33 | trainer.total_epochs=1 2>&1 | tee verl_demo.log
34 | 


--------------------------------------------------------------------------------
/scripts/train_ppo_7B_4gpu.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_ppo \
 2 | data.train_files=$DATA_DIR/train.parquet \
 3 | data.val_files=$DATA_DIR/test.parquet \
 4 | data.train_batch_size=4 \
 5 | data.val_batch_size=4 \
 6 | data.max_prompt_length=512 \
 7 | data.max_response_length=2048 \
 8 | actor_rollout_ref.model.path=$BASE_MODEL \
 9 | actor_rollout_ref.actor.optim.lr=1e-6 \
10 | actor_rollout_ref.model.use_remove_padding=True \
11 | actor_rollout_ref.actor.ppo_mini_batch_size=4 \
12 | actor_rollout_ref.actor.ppo_micro_batch_size=4 \
13 | actor_rollout_ref.actor.fsdp_config.param_offload=False \
14 | actor_rollout_ref.actor.fsdp_config.grad_offload=False \
15 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
16 | actor_rollout_ref.rollout.temperature=0.6 \
17 | actor_rollout_ref.rollout.top_k=1 \
18 | actor_rollout_ref.rollout.do_sample=True \
19 | actor_rollout_ref.rollout.n=2 \
20 | actor_rollout_ref.rollout.log_prob_micro_batch_size=4 \
21 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.2 \
23 | actor_rollout_ref.ref.log_prob_micro_batch_size=4 \
24 | actor_rollout_ref.ref.fsdp_config.param_offload=True \
25 | critic.optim.lr=1e-5 \
26 | critic.model.use_remove_padding=True \
27 | critic.model.path=$BASE_MODEL \
28 | critic.ppo_micro_batch_size=4 \
29 | critic.model.fsdp_config.param_offload=False \
30 | critic.model.fsdp_config.grad_offload=False \
31 | critic.model.fsdp_config.optimizer_offload=False \
32 | algorithm.kl_ctrl.kl_coef=0.001 \
33 | trainer.logger=['wandb'] \
34 | +trainer.val_before_train=False \
35 | trainer.default_hdfs_dir=null \
36 | trainer.n_gpus_per_node=$N_GPUS \
37 | trainer.nnodes=1 \
38 | trainer.save_freq=100 \
39 | trainer.test_freq=20 \
40 | trainer.project_name=KK_logic \
41 | trainer.experiment_name=$EXPERIMENT_NAME \
42 | trainer.total_epochs=1 2>&1 | tee verl_demo.log


--------------------------------------------------------------------------------
/scripts/train_reinforce_plus_4gpu_7Binstruct.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     algorithm.adv_estimator=reinforce_plus_plus \
 7 |     data.train_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/train.parquet \
 8 |     data.val_files=/home/t2vg-a100-G4-43/data/kk/instruct/3ppl/test.parquet \
 9 |     data.train_batch_size=16 \
10 |     data.val_batch_size=16 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=2048 \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct-1M \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size=64 \
18 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
19 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
20 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
21 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
22 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
23 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=160 \
24 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
25 |     actor_rollout_ref.rollout.name=vllm \
26 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
27 |     actor_rollout_ref.rollout.n=8 \
28 |     actor_rollout_ref.ref.log_prob_micro_batch_size=160 \
29 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
30 |     algorithm.kl_ctrl.kl_coef=0.001 \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['wandb'] \
33 |     trainer.project_name='GRPO_logic_KK' \
34 |     trainer.experiment_name='RF++-Qwen-7B-1M-3ppl-001' \
35 |     trainer.n_gpus_per_node=4 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=-1 \
38 |     trainer.test_freq=10 \
39 |     trainer.total_epochs=1 $@
40 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # setup.py is the fallback installation script when pyproject.toml does not work
16 | from setuptools import setup, find_packages
17 | import os
18 | 
19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
20 | 
21 | with open(os.path.join(version_folder, 'verl/version/version')) as f:
22 |     __version__ = f.read().strip()
23 | 
24 | 
25 | with open('requirements.txt') as f:
26 |     required = f.read().splitlines()
27 |     install_requires = [item.strip() for item in required if item.strip()[0] != '#']
28 | 
29 | extras_require = {
30 |     'test': ['pytest', 'yapf']
31 | }
32 | 
33 | from pathlib import Path
34 | this_directory = Path(__file__).parent
35 | long_description = (this_directory / "README.md").read_text()
36 | 
37 | setup(
38 |     name='verl',
39 |     version=__version__,
40 |     package_dir={'': '.'},
41 |     packages=find_packages(where='.'),
42 |     url='https://github.com/volcengine/verl',
43 |     license='Apache 2.0',
44 |     author='Bytedance - Seed - MLSys',
45 |     author_email='zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk',
46 |     description='veRL: Volcano Engine Reinforcement Learning for LLM',
47 |     install_requires=install_requires,
48 |     extras_require=extras_require,
49 |     package_data={'': ['version/*'],
50 |                   'verl': ['trainer/config/*.yaml'],},
51 |     include_package_data=True,
52 |     long_description=long_description,
53 |     long_description_content_type='text/markdown'
54 | )


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/create_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from tests.e2e.envs.digit_completion import DigitCompletion, generate_ground_truth_response
16 | from torch.utils import data
17 | import os
18 | 
19 | if __name__ == '__main__':
20 |     simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9)
21 |     all_prompts = simple_task.get_all_prompts()
22 | 
23 |     # 21 * 6 * 4
24 |     train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2])
25 |     train_data = list(train_data)
26 |     test_data = list(test_data)
27 | 
28 |     train_data = [[{'role': 'user', 'content': str(item)}] \
29 |                      for item in train_data]
30 |     test_data = [[{'role': 'user', 'content': str(item)}] \
31 |                      for item in test_data]
32 | 
33 |     print(f'Size of train: {len(train_data)}, size of test: {len(test_data)}')
34 | 
35 |     train_data = {'prompt': train_data}
36 |     test_data = {'prompt': test_data}
37 | 
38 |     model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)))
39 | 
40 |     import pandas as pd
41 | 
42 |     train_data_frame = pd.DataFrame(train_data)
43 |     test_data_frame = pd.DataFrame(test_data)
44 | 
45 |     train_data_frame.to_parquet(os.path.join(model_folder, 'train.parquet'))
46 |     test_data_frame.to_parquet(os.path.join(model_folder, 'test.parquet'))
47 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/tests/e2e/arithmetic_sequence/data/test.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/tests/e2e/arithmetic_sequence/data/train.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "LlamaForCausalLM"
 4 |   ],
 5 |   "attention_bias": false,
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": null,
 8 |   "eos_token_id": 1,
 9 |   "hidden_act": "silu",
10 |   "hidden_size": 128,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 344,
13 |   "max_position_embeddings": 2048,
14 |   "mlp_bias": false,
15 |   "model_type": "llama",
16 |   "num_attention_heads": 4,
17 |   "num_hidden_layers": 4,
18 |   "num_key_value_heads": 4,
19 |   "pad_token_id": 2,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-06,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.43.3",
27 |   "use_cache": true,
28 |   "vocab_size": 16
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "eos_token_id": 1,
4 |   "pad_token_id": 2,
5 |   "transformers_version": "4.43.3"
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unakar/Logic-RL/9d2c457525ec14639e85afa12d49bb16efb053a4/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "char_ords": [
 3 |         48,
 4 |         49,
 5 |         50,
 6 |         51,
 7 |         52,
 8 |         53,
 9 |         54,
10 |         55,
11 |         56,
12 |         57,
13 |         44,
14 |         58
15 |     ],
16 |     "model_max_length": 2048,
17 |     "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}"
18 | }


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
 1 | # Digit completion
 2 | 
 3 | This is an example of solving a digit completion problem. The problem is defined as below:
 4 | 
 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers.
 6 | If the max number is reached, the next number should be modulo with max number.
 7 | 
 8 | For example,
 9 | - prompt = [1, 2, 3]
10 | - N = 5
11 | - max_number = 6
12 | 
13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1].
14 | 
15 | # Environment definition
16 | 
17 | The core definition of the task is defined in verl/envs/digit_completion/task.py
18 | 
19 | It is highly recommended to take a look at it for better understanding.
20 | 
21 | 
22 | 
23 | # Run experiments
24 | 
25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory)
26 | 
27 | ```bash
28 | # cd examples/arithmetic_sequence/rl
29 | 
30 | # Specify the config path and config name (current working dir)
31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron'
32 | 
33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using:
34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config
35 | 
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/e2e/check_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | import numpy as np
18 | 
19 | 
20 | def extract_reward_from_line(line):
21 |     # TODO: this function needs error handling
22 |     try:
23 |         key_vals = line.split(' - ')
24 |         for key_val in key_vals:
25 |             key, val = key_val.split(':')
26 |             if key == 'critic/rewards/mean':
27 |                 reward = float(val)
28 |                 return reward
29 |         return -np.inf
30 |     except Exception:
31 |         return -np.inf
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument('--output_file', required=True, type=str)
37 | 
38 |     args = parser.parse_args()
39 | 
40 |     with open(args.output_file, 'r') as f:
41 |         output = f.read().split('\n')
42 | 
43 |     best_reward = -np.inf
44 |     for line in output:
45 |         if line.startswith('step'):
46 |             reward = extract_reward_from_line(line)
47 |             if reward > best_reward:
48 |                 best_reward = reward
49 | 
50 |     print(f'Best reward is {best_reward}')
51 |     assert best_reward > 0.2, f'Best reward must be greater than 0.2. best_reward: {best_reward}'
52 |     print('Check passes')
53 | 


--------------------------------------------------------------------------------
/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .digit_completion import DigitCompletion
16 | 
17 | __all__ = ['DigitCompletion']


--------------------------------------------------------------------------------
/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .task import DigitCompletion, generate_ground_truth_response
16 | from .tokenizer import CharTokenizer
17 | 
18 | from transformers import AutoTokenizer, LlamaConfig
19 | 
20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True)
21 | 
22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer']


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
19 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
24 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
25 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
26 |     critic.optim.lr=1e-5 \
27 |     critic.model.use_remove_padding=True \
28 |     critic.model.path=Qwen/Qwen2.5-0.5B \
29 |     critic.model.enable_gradient_checkpointing=False \
30 |     critic.ppo_micro_batch_size=32 \
31 |     critic.model.fsdp_config.param_offload=False \
32 |     critic.model.fsdp_config.grad_offload=False \
33 |     critic.model.fsdp_config.optimizer_offload=False \
34 |     algorithm.kl_ctrl.kl_coef=0.001 \
35 |     trainer.critic_warmup=0 \
36 |     trainer.logger=['console'] \
37 |     trainer.project_name='verl_example_gsm8k' \
38 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
39 |     trainer.n_gpus_per_node=8 \
40 |     trainer.nnodes=1 \
41 |     trainer.save_freq=-1 \
42 |     trainer.total_training_steps=1 $@
43 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=False \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
19 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
24 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
25 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
26 |     critic.optim.lr=1e-5 \
27 |     critic.model.use_remove_padding=False \
28 |     critic.model.path=Qwen/Qwen2.5-0.5B \
29 |     critic.model.enable_gradient_checkpointing=False \
30 |     critic.ppo_micro_batch_size=32 \
31 |     critic.model.fsdp_config.param_offload=False \
32 |     critic.model.fsdp_config.grad_offload=False \
33 |     critic.model.fsdp_config.optimizer_offload=False \
34 |     algorithm.kl_ctrl.kl_coef=0.001 \
35 |     trainer.critic_warmup=0 \
36 |     trainer.logger=['console'] \
37 |     +trainer.val_before_train=False \
38 |     trainer.project_name='verl_example_gsm8k' \
39 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
40 |     trainer.n_gpus_per_node=8 \
41 |     trainer.nnodes=1 \
42 |     trainer.save_freq=-1 \
43 |     trainer.total_training_steps=1 $@
44 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     data.return_raw_chat=True \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
19 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
20 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
21 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
22 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
23 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
24 |     actor_rollout_ref.rollout.name=vllm \
25 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
26 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
27 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
28 |     critic.optim.lr=1e-5 \
29 |     critic.model.use_remove_padding=True \
30 |     critic.optim.lr_warmup_steps_ratio=0.05 \
31 |     critic.model.path=Qwen/Qwen2.5-0.5B \
32 |     critic.model.enable_gradient_checkpointing=False \
33 |     critic.ppo_micro_batch_size=32 \
34 |     critic.model.fsdp_config.param_offload=False \
35 |     critic.model.fsdp_config.grad_offload=False \
36 |     critic.model.fsdp_config.optimizer_offload=False \
37 |     reward_model.enable=True \
38 |     reward_model.model.path=Qwen/Qwen2.5-0.5B\
39 |     reward_model.model.use_remove_padding=True \
40 |     reward_model.model.fsdp_config.param_offload=True \
41 |     reward_model.micro_batch_size=16 \
42 |     algorithm.kl_ctrl.kl_coef=0.001 \
43 |     trainer.critic_warmup=0 \
44 |     trainer.logger=['console'] \
45 |     +trainer.val_before_train=False \
46 |     trainer.project_name='verl_example' \
47 |     trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm' \
48 |     trainer.n_gpus_per_node=8 \
49 |     trainer.nnodes=1 \
50 |     trainer.save_freq=-1 \
51 |     trainer.total_training_steps=1 $@
52 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     data.return_raw_chat=True \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=False \
16 |     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
19 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
20 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
21 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
22 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
23 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
24 |     actor_rollout_ref.rollout.name=vllm \
25 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
26 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
27 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
28 |     critic.optim.lr=1e-5 \
29 |     critic.model.use_remove_padding=False \
30 |     critic.optim.lr_warmup_steps_ratio=0.05 \
31 |     critic.model.path=Qwen/Qwen2.5-0.5B \
32 |     critic.model.enable_gradient_checkpointing=False \
33 |     critic.ppo_micro_batch_size=32 \
34 |     critic.model.fsdp_config.param_offload=False \
35 |     critic.model.fsdp_config.grad_offload=False \
36 |     critic.model.fsdp_config.optimizer_offload=False \
37 |     reward_model.enable=True \
38 |     reward_model.model.path=Qwen/Qwen2.5-0.5B\
39 |     reward_model.model.use_remove_padding=False \
40 |     reward_model.model.fsdp_config.param_offload=True \
41 |     reward_model.micro_batch_size=16 \
42 |     algorithm.kl_ctrl.kl_coef=0.001 \
43 |     trainer.critic_warmup=0 \
44 |     +trainer.val_before_train=False \
45 |     trainer.logger=['console'] \
46 |     trainer.project_name='verl_example' \
47 |     trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm' \
48 |     trainer.n_gpus_per_node=8 \
49 |     trainer.nnodes=1 \
50 |     trainer.save_freq=-1 \
51 |     trainer.total_training_steps=1 $@
52 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     data.return_raw_chat=True \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
19 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
20 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=12000 \
21 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
22 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
28 |     actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=12000 \
29 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
30 |     actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=12000 \
31 |     critic.optim.lr=1e-5 \
32 |     critic.model.use_remove_padding=True \
33 |     critic.optim.lr_warmup_steps_ratio=0.05 \
34 |     critic.model.path=Qwen/Qwen2.5-0.5B \
35 |     critic.model.enable_gradient_checkpointing=False \
36 |     critic.ppo_micro_batch_size=32 \
37 |     critic.use_dynamic_bsz=True \
38 |     critic.ppo_max_token_len_per_gpu=98304 \
39 |     critic.model.fsdp_config.param_offload=False \
40 |     critic.model.fsdp_config.grad_offload=False \
41 |     critic.model.fsdp_config.optimizer_offload=False \
42 |     reward_model.enable=True \
43 |     reward_model.model.path=Qwen/Qwen2.5-0.5B\
44 |     reward_model.model.use_remove_padding=True \
45 |     reward_model.model.fsdp_config.param_offload=True \
46 |     reward_model.micro_batch_size=16 \
47 |     reward_model.use_dynamic_bsz=True \
48 |     reward_model.forward_max_token_len_per_gpu=98304 \
49 |     algorithm.kl_ctrl.kl_coef=0.001 \
50 |     trainer.critic_warmup=0 \
51 |     trainer.logger=['console'] \
52 |     +trainer.val_before_train=False \
53 |     trainer.project_name='verl_example' \
54 |     trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm_seq_balance' \
55 |     trainer.n_gpus_per_node=8 \
56 |     trainer.nnodes=1 \
57 |     trainer.save_freq=-1 \
58 |     trainer.total_training_steps=1 $@
59 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2 with flash_attn has some issues
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     data.return_raw_chat=True \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size=32 \
19 |     actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \
20 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
21 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
22 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
23 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
24 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
25 |     actor_rollout_ref.rollout.name=vllm \
26 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
27 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
28 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
29 |     critic.optim.lr=1e-5 \
30 |     critic.ulysses_sequence_parallel_size=2 \
31 |     critic.model.use_remove_padding=True \
32 |     critic.optim.lr_warmup_steps_ratio=0.05 \
33 |     critic.model.path=Qwen/Qwen2.5-0.5B \
34 |     critic.model.enable_gradient_checkpointing=False \
35 |     critic.ppo_micro_batch_size=32 \
36 |     critic.model.fsdp_config.param_offload=False \
37 |     critic.model.fsdp_config.grad_offload=False \
38 |     critic.model.fsdp_config.optimizer_offload=False \
39 |     reward_model.enable=True \
40 |     reward_model.ulysses_sequence_parallel_size=2 \
41 |     reward_model.model.path=Qwen/Qwen2.5-0.5B\
42 |     reward_model.model.use_remove_padding=True \
43 |     reward_model.model.fsdp_config.param_offload=True \
44 |     reward_model.micro_batch_size=16 \
45 |     algorithm.kl_ctrl.kl_coef=0.001 \
46 |     trainer.critic_warmup=0 \
47 |     +trainer.val_before_train=False \
48 |     trainer.logger=['console'] \
49 |     trainer.project_name='verl_example' \
50 |     trainer.experiment_name='Qwen2.5-0.5B-ci_hybrid_rm_sp2' \
51 |     trainer.n_gpus_per_node=8 \
52 |     trainer.nnodes=1 \
53 |     trainer.save_freq=-1 \
54 |     trainer.total_training_steps=1 $@
55 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt"
 6 | 
 7 | export PATH=$PATH:~/.local/bin
 8 | 
 9 | rm -rf $OUTPUT_FILE
10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
11 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
12 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
13 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
14 |     critic.model.path=tests/e2e/arithmetic_sequence/model | tee $OUTPUT_FILE;
15 | 
16 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE
17 | rm -rf $OUTPUT_FILE
18 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
 6 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
 7 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
 8 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
 9 |     actor_rollout_ref.rollout.name=vllm \
10 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
11 |     actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \
12 |     critic.model.path=Qwen/Qwen2.5-0.5B \
13 |     critic.model.use_remove_padding=True \
14 |     trainer.total_epochs=1


--------------------------------------------------------------------------------
/tests/gpu_utility/test_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_flash_attn_cross_entropy():
17 |     from verl.utils.torch_functional import logprobs_from_logits_naive
18 | 
19 |     from verl.utils.debug import log_gpu_memory_usage
20 | 
21 |     from flash_attn.ops.triton.cross_entropy import cross_entropy_loss
22 | 
23 |     import torch
24 |     from torch import nn
25 | 
26 |     log_gpu_memory_usage('At start')
27 | 
28 |     hidden_states = torch.randn(size=(2048, 5120), device='cuda', requires_grad=True, dtype=torch.bfloat16)
29 | 
30 |     linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device='cuda', dtype=torch.bfloat16)
31 | 
32 |     logits = linear(hidden_states)
33 | 
34 |     # logits = logits.float()
35 |     labels = torch.randint(low=0, high=155136, size=(2048,), device='cuda')
36 | 
37 |     log_gpu_memory_usage('before computation')
38 |     # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True)
39 |     output = -cross_entropy_loss(logits, labels)[0]
40 |     # output = logprobs_from_logits(logits, labels)
41 |     log_gpu_memory_usage('After forward')
42 |     output.sum().backward()
43 |     log_gpu_memory_usage('After backward')
44 | 
45 |     groundtruth = logprobs_from_logits_naive(logits.float(), labels)
46 | 
47 |     torch.testing.assert_close(output, groundtruth)
48 | 


--------------------------------------------------------------------------------
/tests/ray/check_worker_alive/main.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import time
16 | import sys
17 | import os
18 | 
19 | import ray
20 | 
21 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
22 | from verl.single_controller.base.worker import Worker
23 | from verl.single_controller.base.decorator import register, Dispatch
24 | 
25 | 
26 | @ray.remote
27 | class TestActor(Worker):
28 | 
29 |     def __init__(self) -> None:
30 |         super().__init__()
31 | 
32 |     @register(dispatch_mode=Dispatch.ONE_TO_ALL, blocking=False)
33 |     def foo(self, wait_time):
34 |         time.sleep(wait_time)
35 |         sys.exit(1)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     wait_time = int(os.getenv("WAIT_TIME", "10"))
40 | 
41 |     ray.init()
42 | 
43 |     # test single-node-no-partition
44 |     print(f"test single-node-no-partition")
45 |     resource_pool = RayResourcePool([2], use_gpu=True)
46 |     class_with_args = RayClassWithInitArgs(cls=TestActor)
47 | 
48 |     print("create worker group")
49 |     wg = RayWorkerGroup(resource_pool, class_with_args, name_prefix="test")
50 | 
51 |     wg.start_worker_aliveness_check(1)
52 |     time.sleep(1)
53 | 
54 |     print(time.time(), "start foo")
55 | 
56 |     _ = wg.foo(wait_time)
57 |     print("foo started")
58 | 
59 |     print(time.time(),
60 |           f"wait 6x wait time {wait_time*6} to let signal returned to process but still not exceed process wait time")
61 |     time.sleep(wait_time * 6)
62 | 
63 |     ray.shutdown()
64 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | In client, we can get the server handler and send RPC request
16 | """
17 | 
18 | import ray
19 | import torch
20 | 
21 | from verl import DataProto
22 | from verl.single_controller.ray import RayClassWithInitArgs
23 | from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
24 | 
25 | from tensordict import TensorDict
26 | 
27 | from server import Trainer
28 | 
29 | 
30 | def compute_position_id_with_mask(mask):
31 |     return torch.clip(torch.cumsum(mask, dim=-1) - 1, min=0, max=None)
32 | 
33 | 
34 | if __name__ == '__main__':
35 | 
36 |     ray.init(address='auto', namespace='verl')
37 |     # get the worker group using names
38 |     worker_names = ['trainerTrainer_0:0', 'trainerTrainer_0:1']
39 |     cls_with_init_args = RayClassWithInitArgs(cls=Trainer)
40 |     worker_group = NVMegatronRayWorkerGroup.from_detached(worker_names=worker_names,
41 |                                                           ray_cls_with_init=cls_with_init_args)
42 | 
43 |     batch_size = 16
44 |     sequence_length = 1024
45 | 
46 |     # give Trainer some data to train
47 |     input_ids = torch.randint(low=0, high=256, size=(batch_size, sequence_length), dtype=torch.int64, device='cuda')
48 |     attention_mask = torch.ones_like(input_ids)
49 |     position_ids = compute_position_id_with_mask(attention_mask)
50 | 
51 |     data = DataProto(batch=TensorDict(
52 |         {
53 |             'input_ids': input_ids,
54 |             'attention_mask': attention_mask,
55 |             'position_ids': position_ids
56 |         }, batch_size=batch_size),
57 |                      meta_info={})
58 | 
59 |     output = worker_group.train_model(data)
60 | 
61 |     print(output)
62 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/tests/ray/test_check_worker_alive.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import time
16 | import os
17 | import subprocess
18 | 
19 | 
20 | def test():
21 |     wait_time = 10
22 | 
23 |     my_env = os.environ.copy()
24 |     my_env["WAIT_TIME"] = str(wait_time)
25 | 
26 |     p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE)
27 | 
28 |     count = 0
29 |     while b"foo started" not in p.stdout.read():
30 |         time.sleep(1)
31 |         count += 1
32 |         if count > 40:
33 |             raise RuntimeError("timeout for start foo in check_worker_alive/main.py")
34 | 
35 |     print(
36 |         time.time(),
37 |         f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time")
38 |     time.sleep(wait_time * 1.5)
39 |     print(time.time(), f"start checking")
40 |     assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort"
41 |     assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code"
42 |     print(f"test passed")
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     test()
47 | 


--------------------------------------------------------------------------------
/tests/ray/test_driverfunc_to_worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import ray
17 | import torch
18 | from verl import DataProto
19 | from tensordict import TensorDict
20 | 
21 | from verl.single_controller.base.worker import Worker
22 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs
23 | from verl.single_controller.ray import RayWorkerGroup
24 | 
25 | os.environ['RAY_DEDUP_LOGS'] = '0'
26 | os.environ['NCCL_DEBUG'] = 'WARN'
27 | 
28 | 
29 | @ray.remote
30 | class ModelActor(Worker):
31 | 
32 |     def __init__(self):
33 |         pass
34 | 
35 | 
36 | class HackSelf():
37 | 
38 |     def __init__(self):
39 |         pass
40 | 
41 | 
42 | def get_aux_metrics(self, test_proto):
43 |     sequence_ids = test_proto.batch["sequence_ids"]
44 |     decode_count = []
45 |     for i in range(sequence_ids.size(0)):
46 |         decode_count.append(len(sequence_ids[i].tolist()))
47 |     ret_proto = DataProto(batch=TensorDict({
48 |         "sequence_ids": sequence_ids,
49 |         "decode_count": torch.tensor(decode_count)
50 |     },
51 |                                            batch_size=sequence_ids.size(0)))
52 |     return ret_proto
53 | 
54 | 
55 | def test():
56 |     # construct model
57 |     ray.init()
58 | 
59 |     # create 2 workers, each hold a GPU
60 |     resource_pool = RayResourcePool([2], use_gpu=True, name_prefix='a')
61 | 
62 |     class_with_args = RayClassWithInitArgs(cls=ModelActor)
63 |     shard_wg = RayWorkerGroup(resource_pool, class_with_args)
64 | 
65 |     test_bs = 8
66 |     test_proto = DataProto(TensorDict({
67 |         "sequence_ids": torch.ones([test_bs, 2048], dtype=torch.int64),
68 |     },
69 |                                       batch_size=test_bs),
70 |                            meta_info={"query_length": 1536})
71 | 
72 |     # Sharding among different ranks
73 |     ret_proto1 = shard_wg.execute_with_func_generator(get_aux_metrics, test_proto)
74 | 
75 |     # compare execute on driver
76 |     hs = HackSelf()
77 |     ret_proto2 = get_aux_metrics(hs, test_proto)
78 | 
79 |     torch.testing.assert_close(ret_proto1.batch["decode_count"], ret_proto2.batch["decode_count"])
80 | 
81 |     ray.shutdown()
82 | 


--------------------------------------------------------------------------------
/tests/ray/test_ray_local_envs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | e2e test verl.single_controller.ray
16 | """
17 | import os
18 | import ray
19 | 
20 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
21 | from verl.single_controller.base.worker import Worker
22 | from verl.single_controller.base.decorator import register, Dispatch, collect_all_to_all, Execute
23 | 
24 | 
25 | @ray.remote
26 | class TestActor(Worker):
27 | 
28 |     def __init__(self) -> None:
29 |         super().__init__()
30 | 
31 |     def getenv(self, key):
32 |         val = os.getenv(key, f"{key} not set")
33 |         return val
34 | 
35 | 
36 | def test_basics():
37 |     ray.init()
38 | 
39 |     # create 4 workers, each hold a GPU
40 |     resource_pool = RayResourcePool([4], use_gpu=True)
41 |     class_with_args = RayClassWithInitArgs(cls=TestActor)
42 | 
43 |     worker_group = RayWorkerGroup(resource_pool=resource_pool,
44 |                                   ray_cls_with_init=class_with_args,
45 |                                   name_prefix="worker_group_basic")
46 | 
47 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE")
48 |     assert output == ["4", "4", "4", "4"]
49 | 
50 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK")
51 |     assert set(output) == set(["0", "1", "2", "3"])
52 | 
53 |     ray.shutdown()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     test_basics()
58 | 


--------------------------------------------------------------------------------
/tests/ray/test_rvdz.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class TestWorker:
20 | 
21 |     def __init__(self, rank, world_size, group_name):
22 |         self.rank = rank
23 |         self.world_size = world_size
24 |         self.group_name = group_name
25 |         self.communicator = None
26 | 
27 |     def init(self):
28 |         from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray
29 |         self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name)
30 | 
31 |     def test(self):
32 |         if self.communicator is None:
33 |             return None
34 |         return self.communicator.rank_id()
35 | 
36 | 
37 | def test_rvdz():
38 |     ray.init()
39 | 
40 |     group_name = "test_group"
41 |     world_size = 2
42 | 
43 |     workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)]
44 | 
45 |     ray.get([worker.init.remote() for worker in workers])
46 | 
47 |     ranks = ray.get([worker.test.remote() for worker in workers])
48 | 
49 |     assert ranks == [0, 1], f"expecting [0, 1], got {ranks}"
50 | 
51 |     ray.shutdown()
52 | 


--------------------------------------------------------------------------------
/tests/sanity/check_license.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | license_head = "Copyright 2024 Bytedance Ltd. and/or its affiliates"
16 | 
17 | from pathlib import Path
18 | from argparse import ArgumentParser
19 | 
20 | if __name__ == '__main__':
21 |     parser = ArgumentParser()
22 |     parser.add_argument('--directory', '-d', required=True, type=str)
23 |     args = parser.parse_args()
24 |     directory_in_str = args.directory
25 | 
26 |     pathlist = Path(directory_in_str).glob('**/*.py')
27 |     for path in pathlist:
28 |         # because path is object not string
29 |         path_in_str = str(path.absolute())
30 |         with open(path_in_str, 'r') as f:
31 |             file_content = f.read()
32 | 
33 |             assert license_head in file_content, f'file {path_in_str} does not contain license'
34 | 
35 |         print(path_in_str)
36 | 


--------------------------------------------------------------------------------
/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_import():
17 |     import verl
18 |     print(verl.__version__)
19 | 
20 | 
21 | def test_single_controller_import():
22 |     import verl.single_controller
23 |     print(verl.single_controller.__version__)
24 | 


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_rl_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | import torch
16 | from torch.utils.data import DataLoader
17 | from transformers import AutoTokenizer
18 | 
19 | 
20 | def get_gsm8k_data():
21 |     # prepare test dataset
22 |     url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/gsm8k/train.parquet"
23 |     local_folder = os.path.expanduser('~/verl-data/gsm8k/')
24 |     local_path = os.path.join(local_folder, 'train.parquet')
25 |     os.makedirs(local_folder, exist_ok=True)
26 |     return local_path
27 | 
28 | 
29 | def test_rl_dataset():
30 |     from verl.utils.dataset.rl_dataset import RLHFDataset, collate_fn
31 |     from verl.utils import hf_tokenizer
32 |     tokenizer = hf_tokenizer('deepseek-ai/deepseek-coder-1.3b-instruct')
33 |     local_path = get_gsm8k_data()
34 |     dataset = RLHFDataset(parquet_files=local_path, tokenizer=tokenizer, prompt_key='prompt', max_prompt_length=256)
35 | 
36 |     dataloader = DataLoader(dataset=dataset, batch_size=16, shuffle=True, drop_last=True, collate_fn=collate_fn)
37 | 
38 |     a = next(iter(dataloader))
39 | 
40 |     from verl import DataProto
41 | 
42 |     tensors = {}
43 |     non_tensors = {}
44 | 
45 |     for key, val in a.items():
46 |         if isinstance(val, torch.Tensor):
47 |             tensors[key] = val
48 |         else:
49 |             non_tensors[key] = val
50 | 
51 |     data_proto = DataProto.from_dict(tensors=tensors, non_tensors=non_tensors)
52 | 
53 |     data = dataset[0]['input_ids']
54 |     output = tokenizer.batch_decode([data])[0]
55 |     print(f'type: type{output}')
56 |     print(f'\n\noutput: {output}')
57 | 


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_rm_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from transformers import AutoTokenizer
17 | from verl.utils import hf_tokenizer
18 | from verl.utils.dataset.rm_dataset import RMDataset
19 | 
20 | 
21 | def get_rm_data():
22 |     # prepare test dataset
23 |     url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet"
24 |     local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/')
25 |     local_path = os.path.join(local_folder, 'test.parquet')
26 |     os.makedirs(local_folder, exist_ok=True)
27 |     return local_path
28 | 
29 | 
30 | def test_rm_dataset():
31 |     tokenizer = hf_tokenizer("facebook/opt-1.3b")
32 |     local_path = get_rm_data()
33 |     dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512)
34 |     data = dataset[0]['input_ids']
35 |     output = tokenizer.batch_decode(data)
36 |     assert len(output) > 1
37 |     assert type(output[0]) == str
38 | 


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_sft_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from transformers import AutoTokenizer
17 | from verl.utils import hf_tokenizer
18 | from verl.utils.dataset.sft_dataset import SFTDataset
19 | 
20 | 
21 | def get_gsm8k_data():
22 |     # prepare test dataset
23 |     url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/gsm8k/train.parquet"
24 |     local_folder = os.path.expanduser('~/verl-data/gsm8k/')
25 |     local_path = os.path.join(local_folder, 'train.parquet')
26 |     return local_path
27 | 
28 | 
29 | def test_sft_cot_dataset():
30 |     tokenizer = hf_tokenizer('deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct')
31 |     local_path = get_gsm8k_data()
32 |     dataset = SFTDataset(parquet_files=local_path,
33 |                          tokenizer=tokenizer,
34 |                          prompt_key='prompt',
35 |                          prompt_dict_keys=['content'],
36 |                          response_key='extra_info',
37 |                          response_dict_keys=['answer'],
38 |                          max_length=512)
39 | 
40 |     data = dataset[0]['input_ids']
41 |     output = tokenizer.batch_decode([data])[0]
42 |     assert len(output) > 1
43 |     assert type(output) == str
44 | 
45 | 
46 | def test_sft_dataset():
47 |     tokenizer = hf_tokenizer('deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct')
48 |     local_path = get_gsm8k_data()
49 |     dataset = SFTDataset(parquet_files=local_path,
50 |                          tokenizer=tokenizer,
51 |                          prompt_key='extra_info',
52 |                          prompt_dict_keys=['question'],
53 |                          response_key='extra_info',
54 |                          response_dict_keys=['answer'],
55 |                          max_length=512)
56 | 
57 |     data = dataset[0]['input_ids']
58 |     output = tokenizer.batch_decode([data])[0]
59 |     assert len(output) > 1
60 |     assert type(output) == str


--------------------------------------------------------------------------------
/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 
22 | from .protocol import DataProto
23 | 
24 | from .utils.logging_utils import set_basic_config
25 | import logging
26 | 
27 | set_basic_config(level=logging.WARNING)
28 | 


--------------------------------------------------------------------------------
/verl/models/README.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 
 3 | ## Adding a New Huggingface Model
 4 | ### Step 1: Copy the model file from HF to verl
 5 | - Add a new file under verl/models/hf
 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf
 7 | 
 8 | ### Step 2: Modify the model file to use packed inputs
 9 | - Remove all the code related to inference (kv cache)
10 | - Modify the inputs to include only
11 |     - input_ids (total_nnz,)
12 |     - cu_seqlens (total_nnz + 1,)
13 |     - max_seqlen_in_batch: int
14 | - Note that this requires using flash attention with causal mask.
15 | 
16 | ### Step 2.5: Add tests
17 | - Add a test to compare this version and the huggingface version
18 | - Following the infrastructure and add tests to tests/models/hf
19 | 
20 | ### Step 3: Add a function to apply tensor parallelism
21 | - Please follow
22 |     - https://pytorch.org/docs/stable/distributed.tensor.parallel.html
23 |     - https://pytorch.org/tutorials/intermediate/TP_tutorial.html
24 | - General comments
25 |     - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward.
26 | 
27 | ### Step 4: Add a function to apply data parallelism
28 | - Please use FSDP2 APIs
29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413
30 | 
31 | ### Step 5: Add a function to apply pipeline parallelism
32 | - Comes in Pytorch 2.4
33 | - Currently only in alpha in nightly version
34 | - Check torchtitan for more details
35 | 
36 | 


--------------------------------------------------------------------------------
/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     # original model with megatron
17 |     ParallelLlamaModel,
18 |     ParallelLlamaForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelLlamaForCausalLMRmPad,
21 |     ParallelLlamaForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelLlamaForCausalLMRmPadPP,
24 |     ParallelLlamaForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_mlp import ParallelLlamaMLP
18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
19 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | import torch
17 | from megatron.core import ModelParallelConfig
18 | from torch import nn
19 | from transformers import LlamaConfig
20 | 
21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
22 | from verl.utils.megatron import sequence_parallel as sp_utils
23 | 
24 | 
25 | class ParallelLlamaRMSNorm(nn.Module):
26 | 
27 |     def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig):
28 |         """
29 |         LlamaRMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(input=hidden_states,
43 |                                      weight=self.weight,
44 |                                      normalized_shape=self.normalized_shape,
45 |                                      eps=self.variance_epsilon,
46 |                                      memory_efficient=True)


--------------------------------------------------------------------------------
/verl/models/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib
16 | from typing import List, Optional, Type
17 | 
18 | import torch.nn as nn
19 | 
20 | # Supported models using HF Rmpad
21 | # TODO(sgm): HF may supported more than listed here, we should add more after testing
22 | from transformers import LlamaConfig, MistralConfig, GemmaConfig, Qwen2Config
23 | 
24 | _REOVEPAD_MODELS = {'llama': LlamaConfig, 'mistral': MistralConfig, 'gemma': GemmaConfig, 'qwen2': Qwen2Config}
25 | 
26 | 
27 | def check_model_support_rmpad(model_type: str):
28 |     assert isinstance(model_type, str)
29 |     if not model_type in _REOVEPAD_MODELS.keys():
30 |         raise ValueError(f"Model architecture {model_type} is not supported for now. "
31 |                          f"RMPad supported architectures: {_REOVEPAD_MODELS.keys()}."
32 |                          f"Please set `use_remove_padding=False` in the model config.")
33 | 
34 | 
35 | # Supported models in Megatron-LM
36 | # Architecture -> (module, class).
37 | _MODELS = {
38 |     "LlamaForCausalLM":
39 |         ("llama", ("ParallelLlamaForCausalLMRmPadPP", "ParallelLlamaForValueRmPadPP", "ParallelLlamaForCausalLMRmPad")),
40 |     "MistralForCausalLM": ("mistral", ("ParallelMistralForCausalLMRmPadPP", "ParallelMistralForValueRmPadPP",
41 |                                        "ParallelMistralForCausalLMRmPad"))
42 | }
43 | 
44 | 
45 | # return model class
46 | class ModelRegistry:
47 | 
48 |     @staticmethod
49 |     def load_model_cls(model_arch: str, value=False) -> Optional[Type[nn.Module]]:
50 |         if model_arch not in _MODELS:
51 |             return None
52 | 
53 |         megatron = "megatron"
54 | 
55 |         module_name, model_cls_name = _MODELS[model_arch]
56 |         if not value:  # actor/ref
57 |             model_cls_name = model_cls_name[0]
58 |         elif value:  # critic/rm
59 |             model_cls_name = model_cls_name[1]
60 | 
61 |         module = importlib.import_module(f"verl.models.{module_name}.{megatron}.modeling_{module_name}_megatron")
62 |         return getattr(module, model_cls_name, None)
63 | 
64 |     @staticmethod
65 |     def get_supported_archs() -> List[str]:
66 |         return list(_MODELS.keys())
67 | 


--------------------------------------------------------------------------------
/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/weight_loader_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def get_weight_loader(arch: str):
17 |     from verl.models.llama.megatron.checkpoint_utils.llama_loader import load_state_dict_to_megatron_llama
18 |     _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {'LlamaForCausalLM': load_state_dict_to_megatron_llama}
19 | 
20 |     if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY:
21 |         return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch]
22 |     raise ValueError(f"Model architectures {arch} are not supported for now. "
23 |                      f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}")
24 | 


--------------------------------------------------------------------------------
/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool
17 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from dataclasses import dataclass
17 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo
18 | 
19 | 
20 | class MegatronWorker(Worker):
21 | 
22 |     def __init__(self, cuda_visible_devices=None) -> None:
23 |         super().__init__(cuda_visible_devices)
24 | 
25 |     def get_megatron_global_info(self):
26 |         from megatron.core import parallel_state as mpu
27 |         tp_size = mpu.get_tensor_model_parallel_world_size()
28 |         dp_size = mpu.get_data_parallel_world_size()
29 |         pp_size = mpu.get_pipeline_model_parallel_world_size()
30 |         info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size)
31 |         return info
32 | 
33 |     def get_megatron_rank_info(self):
34 |         from megatron.core import parallel_state as mpu
35 |         tp_rank = mpu.get_tensor_model_parallel_rank()
36 |         dp_rank = mpu.get_data_parallel_rank()
37 |         pp_rank = mpu.get_pipeline_model_parallel_rank()
38 |         info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank)
39 |         return info


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/worker_group.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from .worker import DistRankInfo, DistGlobalInfo
18 | from verl.single_controller.base import ResourcePool, WorkerGroup
19 | 
20 | 
21 | class MegatronWorkerGroup(WorkerGroup):
22 | 
23 |     def __init__(self, resource_pool: ResourcePool, **kwargs):
24 |         super().__init__(resource_pool=resource_pool, **kwargs)
25 |         self._megatron_rank_info = None
26 |         self._megatron_global_info: DistGlobalInfo = None
27 | 
28 |     def init_megatron(self, default_megatron_kwargs: Dict = None):
29 |         raise NotImplementedError(f"MegatronWorkerGroup.init_megatron should be overwritten")
30 | 
31 |     def get_megatron_rank_info(self, rank: int) -> DistRankInfo:
32 |         assert 0 <= rank < self.world_size, f'rank must be from [0, world_size), Got {rank}'
33 |         return self._megatron_rank_info[rank]
34 | 
35 |     @property
36 |     def tp_size(self):
37 |         assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized"
38 |         return self._megatron_global_info.tp_size
39 | 
40 |     @property
41 |     def dp_size(self):
42 |         assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized"
43 |         return self._megatron_global_info.dp_size
44 | 
45 |     @property
46 |     def pp_size(self):
47 |         assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized"
48 |         return self._megatron_global_info.pp_size
49 | 
50 |     def get_megatron_global_info(self):
51 |         return self._megatron_global_info
52 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class WorkerGroupRegisterCenter:
20 | 
21 |     def __init__(self, rank_zero_info):
22 |         self.rank_zero_info = rank_zero_info
23 | 
24 |     def get_rank_zero_info(self):
25 |         return self.rank_zero_info
26 | 
27 | 
28 | def create_worker_group_register_center(name, info):
29 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
30 | 


--------------------------------------------------------------------------------
/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls
16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo)


--------------------------------------------------------------------------------
/verl/single_controller/version/version:
--------------------------------------------------------------------------------
1 | 0.0.2


--------------------------------------------------------------------------------
/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from importlib.metadata import version, PackageNotFoundError
16 | 
17 | 
18 | def get_version(pkg):
19 |     try:
20 |         return version(pkg)
21 |     except PackageNotFoundError:
22 |         return None
23 | 
24 | 
25 | package_name = 'vllm'
26 | package_version = get_version(package_name)
27 | 
28 | if package_version == '0.3.1':
29 |     vllm_version = '0.3.1'
30 |     from .vllm_v_0_3_1.llm import LLM
31 |     from .vllm_v_0_3_1.llm import LLMEngine
32 |     from .vllm_v_0_3_1 import parallel_state
33 | elif package_version == '0.4.2':
34 |     vllm_version = '0.4.2'
35 |     from .vllm_v_0_4_2.llm import LLM
36 |     from .vllm_v_0_4_2.llm import LLMEngine
37 |     from .vllm_v_0_4_2 import parallel_state
38 | elif package_version == '0.5.4':
39 |     vllm_version = '0.5.4'
40 |     from .vllm_v_0_5_4.llm import LLM
41 |     from .vllm_v_0_5_4.llm import LLMEngine
42 |     from .vllm_v_0_5_4 import parallel_state
43 | elif package_version == '0.6.3':
44 |     vllm_version = '0.6.3'
45 |     from .vllm_v_0_6_3.llm import LLM
46 |     from .vllm_v_0_6_3.llm import LLMEngine
47 |     from .vllm_v_0_6_3 import parallel_state
48 | else:
49 |     raise ValueError(
50 |         f'vllm version {package_version} not supported. Currently supported versions are 0.3.1, 0.4.2, 0.5.4 and 0.6.3.'
51 |     )
52 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models
15 | 
16 | from typing import Dict, Union, Optional, Iterable, Tuple
17 | 
18 | import torch
19 | import torch.nn as nn
20 | 
21 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype
22 | from vllm.model_executor.model_loader.weight_utils import default_weight_loader
23 | 
24 | 
25 | def update_hf_weight_loader():
26 |     print('no hf weight loader need to be updated')
27 |     return
28 | 
29 | 
30 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module):
31 |     assert isinstance(actor_weights, Dict)
32 |     with set_default_torch_dtype(next(vllm_model.parameters()).dtype):  # TODO
33 |         if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys():
34 |             del actor_weights["lm_head.weight"]
35 |         vllm_model.load_weights(actor_weights.items())
36 |     for _, module in vllm_model.named_modules():
37 |         quant_method = getattr(module, "quant_method", None)
38 |         if quant_method is not None:
39 |             quant_method.process_weights_after_loading(module)
40 |         # FIXME: Remove this after Mixtral is updated
41 |         # to use quant_method.
42 |         if hasattr(module, "process_weights_after_loading"):
43 |             module.process_weights_after_loading()
44 |     vllm_model = vllm_model.cuda()
45 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader
15 | 
16 | from typing import Dict
17 | 
18 | import torch.nn as nn
19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype
20 | 
21 | 
22 | def update_hf_weight_loader():
23 |     print("no hf weight loader need to be updated")
24 |     return
25 | 
26 | 
27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module):
28 |     assert isinstance(actor_weights, Dict)
29 |     with set_default_torch_dtype(next(vllm_model.parameters()).dtype):  # TODO
30 |         if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys():
31 |             del actor_weights["lm_head.weight"]
32 |         vllm_model.load_weights(actor_weights.items())
33 |     for _, module in vllm_model.named_modules():
34 |         quant_method = getattr(module, "quant_method", None)
35 |         if quant_method is not None:
36 |             quant_method.process_weights_after_loading(module)
37 |         # FIXME: Remove this after Mixtral is updated
38 |         # to use quant_method.
39 |         if hasattr(module, "process_weights_after_loading"):
40 |             module.process_weights_after_loading()
41 |     vllm_model = vllm_model.cuda()
42 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
15 | 
16 | from typing import Optional
17 | 
18 | from transformers import PreTrainedTokenizer
19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup
20 | from vllm.utils import LRUCache
21 | 
22 | 
23 | class TokenizerGroup(TokenizerGroup):
24 |     """A group of tokenizers that can be used for LoRA adapters."""
25 | 
26 |     def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int,
27 |                  max_input_length: Optional[int]):
28 |         self.enable_lora = enable_lora
29 |         self.max_input_length = max_input_length
30 |         self.tokenizer = tokenizer
31 |         self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None
32 | 
33 |     # FIXME(sgm): for simplicity, we assign the special token here
34 |     @property
35 |     def pad_token_id(self):
36 |         return self.tokenizer.pad_token_id
37 | 
38 |     @property
39 |     def eos_token_id(self):
40 |         return self.tokenizer.eos_token_id
41 | 


--------------------------------------------------------------------------------
/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
3 |   prompt_key: prompt
4 |   response_key: responses
5 |   data_source_key: data_source
6 |   reward_model_key: reward_model


--------------------------------------------------------------------------------
/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 | 
 5 | data:
 6 |   path: ~/data/rlhf/math/test.parquet
 7 |   prompt_key: prompt
 8 |   n_samples: 5
 9 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
10 |   batch_size: 128
11 | 
12 | model:
13 |   path: ~/models/Qwen2-7B-Instruct
14 |   external_lib: null
15 | rollout:
16 |   name: vllm
17 |   temperature: 1.0
18 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
19 |   top_p: 0.7
20 |   prompt_length: 1536
21 |   response_length: 512
22 |   # for vllm rollout
23 |   dtype: bfloat16 # should align with FSDP
24 |   gpu_memory_utilization: 0.5
25 |   ignore_eos: False
26 |   micro_batch_size: 256
27 |   enforce_eager: True
28 |   free_cache_engine: True
29 |   load_format: dummy_dtensor
30 |   tensor_model_parallel_size: 1
31 |   max_num_batched_tokens: 8192
32 |   max_num_seqs: 1024
33 |   log_prob_micro_batch_size: 8
34 |   # for hf rollout
35 |   do_sample: True


--------------------------------------------------------------------------------
/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: 16  # this is also val batch size
 4 |   train_files: ~/data/gsm8k/train.parquet
 5 |   val_files: ~/data/gsm8k/test.parquet
 6 |   prompt_key: question
 7 |   response_key: answer
 8 |   max_length: 1024
 9 |   truncation: error
10 |   balance_dp_token: False
11 |   chat_template: null
12 | model:
13 |   partial_pretrain: ~/models/gemma-1.1-7b-it
14 |   fsdp_config:
15 |     wrap_policy:
16 |       min_num_params: 0
17 |     cpu_offload: False
18 |     offload_params: False
19 |   external_lib: null
20 |   enable_gradient_checkpointing: False
21 |   trust_remote_code: False
22 | optim:
23 |   lr: 1e-5
24 |   betas: [0.9, 0.95]
25 |   weight_decay: 0.01
26 |   warmup_steps_ratio: 0.1
27 |   clip_grad: 1.0
28 | 
29 | trainer:
30 |   default_local_dir: /tmp/sft_model
31 |   default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here
32 |   resume_path: null
33 |   project_name: gsm8k-sft
34 |   experiment_name: test
35 |   total_epochs: 4
36 |   logger: ['console']
37 |   seed: 1
38 | 
39 | 


--------------------------------------------------------------------------------
/verl/trainer/main_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Offline evaluate the performance of a generated file using reward model and ground truth verifier.
16 | The input is a parquet file that contains N generated sequences and (optional) the ground truth.
17 | 
18 | """
19 | 
20 | import hydra
21 | from verl.utils.fs import copy_local_path_from_hdfs
22 | from verl.utils.reward_score import math, gsm8k, kk
23 | import pandas as pd
24 | import numpy as np
25 | 
26 | 
27 | def select_reward_fn(data_source):
28 |     if data_source == 'lighteval/MATH':
29 |         return math.compute_score
30 |     if 'kk' in data_source:
31 |         return kk.compute_score
32 |     else:
33 |         raise NotImplementedError
34 | 
35 | 
36 | @hydra.main(config_path='config', config_name='evaluation', version_base=None)
37 | def main(config):
38 |     local_path = copy_local_path_from_hdfs(config.data.path)
39 |     dataset = pd.read_parquet(local_path)
40 |     prompts = dataset[config.data.prompt_key]
41 |     responses = dataset[config.data.response_key]
42 |     data_sources = dataset[config.data.data_source_key]
43 |     reward_model_data = dataset[config.data.reward_model_key]
44 | 
45 |     passes = 0
46 | 
47 |     total = len(dataset)
48 | 
49 |     for i in range(total):
50 |         response_lst = responses[i]
51 |         data_source = data_sources[i]
52 |         # select reward score based on data_source
53 |         prompt = prompts[i]
54 |         reward_data = reward_model_data[i]
55 |         reward_fn = select_reward_fn(data_source)
56 |         ground_truth = reward_data['ground_truth']
57 |         score_lst = []
58 |         for r in response_lst:
59 |             score = reward_fn(r, ground_truth)
60 |             score_lst.append(score)
61 | 
62 |         max_score = np.max(score_lst)
63 | 
64 |         if max_score == 3:
65 |             passes += 1
66 | 
67 |     print(f'pass@5: {passes / total}')
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import tokenizer
16 | from .tokenizer import *
17 | 
18 | __all__ = tokenizer.__all__


--------------------------------------------------------------------------------
/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from omegaconf import DictConfig
18 | 
19 | 
20 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
21 |     for key in dictionary:
22 |         if hasattr(config, key):
23 |             dictionary[key] = getattr(config, key)
24 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 


--------------------------------------------------------------------------------
/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import log_gpu_memory_usage


--------------------------------------------------------------------------------
/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.distributed as dist
17 | import logging
18 | 
19 | 
20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0):
21 |     if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank):
22 |         memory_allocated = torch.cuda.memory_allocated() / 1024**3
23 |         memory_reserved = torch.cuda.memory_reserved() / 1024**3
24 | 
25 |         message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}'
26 | 
27 |         if logger is None:
28 |             print(message)
29 |         else:
30 |             logger.log(msg=message, level=level)
31 | 


--------------------------------------------------------------------------------
/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | import os
16 | 
17 | 
18 | def initialize_global_process_group(timeout_second=36000):
19 |     import torch.distributed
20 |     from datetime import timedelta
21 |     torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second))
22 |     local_rank = int(os.environ["LOCAL_RANK"])
23 |     rank = int(os.environ["RANK"])
24 |     world_size = int(os.environ["WORLD_SIZE"])
25 | 
26 |     if torch.distributed.is_initialized():
27 |         torch.cuda.set_device(local_rank)
28 |     return local_rank, rank, world_size
29 | 


--------------------------------------------------------------------------------
/verl/utils/import_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Utilities to check if packages are available.
16 | We assume package availability won't change during runtime.
17 | """
18 | 
19 | from functools import cache
20 | from typing import List
21 | 
22 | 
23 | @cache
24 | def is_megatron_core_available():
25 |     try:
26 |         from megatron.core import parallel_state as mpu
27 |         return True
28 |     except ImportError:
29 |         return False
30 | 
31 | 
32 | @cache
33 | def is_vllm_available():
34 |     try:
35 |         import vllm
36 |         return True
37 |     except ImportError:
38 |         return False
39 | 
40 | 
41 | def import_external_libs(external_libs=None):
42 |     if external_libs is None:
43 |         return
44 |     if not isinstance(external_libs, List):
45 |         external_libs = [external_libs]
46 |     import importlib
47 |     for external_lib in external_libs:
48 |         importlib.import_module(external_lib)
49 | 


--------------------------------------------------------------------------------
/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/logger/aggregate_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | A Ray logger will receive logging info from different processes.
16 | """
17 | import numbers
18 | from typing import Dict
19 | 
20 | 
21 | def concat_dict_to_str(dict: Dict, step):
22 |     output = [f'step:{step}']
23 |     for k, v in dict.items():
24 |         if isinstance(v, numbers.Number):
25 |             output.append(f'{k}:{v:.3f}')
26 |     output_str = ' - '.join(output)
27 |     return output_str
28 | 
29 | 
30 | class LocalLogger:
31 | 
32 |     def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False):
33 |         self.print_to_console = print_to_console
34 |         if print_to_console:
35 |             print('Using LocalLogger is deprecated. The constructor API will change ')
36 | 
37 |     def flush(self):
38 |         pass
39 | 
40 |     def log(self, data, step):
41 |         if self.print_to_console:
42 |             print(concat_dict_to_str(data, step=step), flush=True)


--------------------------------------------------------------------------------
/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | 
17 | 
18 | def set_basic_config(level):
19 |     """
20 |     This function sets the global logging format and level. It will be called when import verl
21 |     """
22 |     logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level)
23 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | 
18 | class MemoryBuffer:
19 | 
20 |     def __init__(self, numel, numel_padded, dtype):
21 |         self.numel = numel
22 |         self.numel_padded = numel_padded
23 |         self.dtype = dtype
24 |         self.data = torch.zeros(self.numel_padded,
25 |                                 dtype=self.dtype,
26 |                                 device=torch.cuda.current_device(),
27 |                                 requires_grad=False)
28 | 
29 |     def zero(self):
30 |         """Reset the buffer to zero."""
31 |         self.data.zero_()
32 | 
33 |     def get(self, shape, start_index):
34 |         """Return a tensor with the input `shape` as a view into the
35 |         1-D data starting at `start_index`."""
36 |         end_index = start_index + shape.numel()
37 |         assert end_index <= self.numel, \
38 |             'requested tensor is out of the buffer range.'
39 |         buffer_tensor = self.data[start_index:end_index]
40 |         buffer_tensor = buffer_tensor.view(shape)
41 |         return buffer_tensor
42 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/pipeline_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | from megatron.core import parallel_state as mpu
18 | 
19 | from .sequence_parallel import pad_to_sequence_parallel
20 | 
21 | 
22 | def compute_transformers_input_shapes(batches, meta_info):
23 |     from flash_attn.bert_padding import unpad_input  # flash 2 is a must for Megatron
24 |     # pre-compute input shapes for each micro-batch at each pp stage
25 |     input_shapes = []
26 |     for model_inputs in batches:
27 |         input_ids = model_inputs['input_ids']
28 |         attention_mask = model_inputs['attention_mask']
29 |         input_ids_rmpad = unpad_input(input_ids.unsqueeze(dim=-1), attention_mask)[0]  # (total_nnz, 1)
30 |         if meta_info['sequence_parallel']:
31 |             input_ids_rmpad = pad_to_sequence_parallel(input_ids_rmpad)
32 |             # compute shapes for model_inputs
33 |             input_shapes.append(
34 |                 torch.Size([
35 |                     input_ids_rmpad.shape[0] // mpu.get_tensor_model_parallel_world_size(), 1, meta_info['hidden_size']
36 |                 ]))
37 |         else:
38 |             # compute shapes for model_inputs
39 |             input_shapes.append(torch.Size([input_ids_rmpad.shape[0], 1, meta_info['hidden_size']]))
40 |     return input_shapes
41 | 
42 | 
43 | def make_batch_generator(batches, vpp_size):
44 |     if vpp_size > 1:
45 |         # has vpp
46 |         batch_generator = [batches] * vpp_size  # number of vpp chunks
47 |         batch_generator = [iter(b) for b in batch_generator]
48 |     else:
49 |         # no vpp
50 |         batch_generator = iter(batches)
51 |     return batch_generator
52 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/sequence_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | import torch.nn.functional as F
18 | from megatron.core import parallel_state as mpu
19 | 
20 | 
21 | def mark_parameter_as_sequence_parallel(parameter):
22 |     setattr(parameter, 'sequence_parallel', True)
23 | 
24 | 
25 | def is_sequence_parallel_param(param):
26 |     return hasattr(param, 'sequence_parallel') and param.sequence_parallel
27 | 
28 | 
29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor):
30 |     """pad the tokens such that the total length is a multiple of sp world size
31 | 
32 |     Args:
33 |         unpad_tokens: (total_nnz, ...). Tokens after removing padding
34 | 
35 |     Returns:
36 | 
37 |     """
38 |     total_nnz = unpad_tokens.shape[0]
39 |     sp_world_size = mpu.get_tensor_model_parallel_world_size()
40 | 
41 |     if total_nnz % sp_world_size == 0:
42 |         pad_size = 0
43 |     else:
44 |         pad_size = sp_world_size - total_nnz % sp_world_size
45 | 
46 |     if pad_size > 0:
47 |         if unpad_tokens.ndim == 1:
48 |             unpad_tokens = F.pad(unpad_tokens, (0, pad_size))
49 |         elif unpad_tokens.ndim == 2:
50 |             unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size))
51 |         else:
52 |             raise NotImplementedError(f'Padding dim {unpad_tokens.ndim()} is not supported')
53 | 
54 |     return unpad_tokens
55 | 


--------------------------------------------------------------------------------
/verl/utils/py_functional.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contain small python utility functions
16 | """
17 | 
18 | from typing import Dict
19 | from types import SimpleNamespace
20 | 
21 | 
22 | def union_two_dict(dict1: Dict, dict2: Dict):
23 |     """Union two dict. Will throw an error if there is an item not the same object with the same key.
24 | 
25 |     Args:
26 |         dict1:
27 |         dict2:
28 | 
29 |     Returns:
30 | 
31 |     """
32 |     for key, val in dict2.items():
33 |         if key in dict1:
34 |             assert dict2[key] == dict1[key], \
35 |                 f'{key} in meta_dict1 and meta_dict2 are not the same object'
36 |         dict1[key] = val
37 | 
38 |     return dict1
39 | 
40 | 
41 | def append_to_dict(data: Dict, new_data: Dict):
42 |     for key, val in new_data.items():
43 |         if key not in data:
44 |             data[key] = []
45 |         data[key].append(val)
46 | 
47 | 
48 | class NestedNamespace(SimpleNamespace):
49 | 
50 |     def __init__(self, dictionary, **kwargs):
51 |         super().__init__(**kwargs)
52 |         for key, value in dictionary.items():
53 |             if isinstance(value, dict):
54 |                 self.__setattr__(key, NestedNamespace(value))
55 |             else:
56 |                 self.__setattr__(key, value)
57 | 


--------------------------------------------------------------------------------
/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains commonly used utilities for ray
16 | """
17 | 
18 | import ray
19 | 
20 | import concurrent.futures
21 | 
22 | 
23 | def parallel_put(data_list, max_workers=None):
24 | 
25 |     def put_data(index, data):
26 |         return index, ray.put(data)
27 | 
28 |     if max_workers is None:
29 |         max_workers = min(len(data_list), 16)
30 | 
31 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
32 |         data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)]
33 |         res_lst = []
34 |         for future in concurrent.futures.as_completed(data_list_f):
35 |             res_lst.append(future.result())
36 | 
37 |         # reorder based on index
38 |         output = [None for _ in range(len(data_list))]
39 |         for res in res_lst:
40 |             index, data_ref = res
41 |             output[index] = data_ref
42 | 
43 |     return output
44 | 


--------------------------------------------------------------------------------
/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/gsm8k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | 
18 | def extract_solution(solution_str, method='strict'):
19 |     assert method in ['strict', 'flexible']
20 | 
21 |     if method == 'strict':
22 |         # this also tests the formatting of the model
23 |         solution = re.search("#### (\\-?[0-9\\.\\,]+)", solution_str)
24 |         if solution is None:
25 |             final_answer = None
26 |         else:
27 |             final_answer = solution.group(0)
28 |             final_answer = final_answer.split('#### ')[1].replace(',', '').replace('$', '')
29 |     elif method == 'flexible':
30 |         answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str)
31 |         final_answer = None
32 |         if len(answer) == 0:
33 |             # no reward is there is no answer
34 |             pass
35 |         else:
36 |             invalid_str = ['', '.']
37 |             # find the last number that is not '.'
38 |             for final_answer in reversed(answer):
39 |                 if final_answer not in invalid_str:
40 |                     break
41 |     return final_answer
42 | 
43 | 
44 | def compute_score(solution_str, ground_truth, method='strict', format_score=0., score=1.):
45 |     """The scoring function for GSM8k.
46 | 
47 |     Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024.
48 | 
49 |     Args:
50 |         solution_str: the solution text
51 |         ground_truth: the ground truth
52 |         method: the method to extract the solution, choices are 'strict' and 'flexible'
53 |         format_score: the score for the format
54 |         score: the score for the correct answer
55 |     """
56 |     answer = extract_solution(solution_str=solution_str, method=method)
57 |     if answer is None:
58 |         return 0
59 |     else:
60 |         if answer == ground_truth:
61 |             return score
62 |         else:
63 |             return format_score


--------------------------------------------------------------------------------
/verl/utils/reward_score/multiply.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import random
 3 | 
 4 | 
 5 | def extract_solution(solution_str):
 6 |     # Remove everything before the first "Assistant:"
 7 |     if "Assistant:" in solution_str:
 8 |         solution_str = solution_str.split("Assistant:", 1)[1]
 9 |     else:
10 |         return None
11 | 
12 |     answer_pattern = r'<answer>(.*?)</answer>'
13 |     match = re.finditer(answer_pattern, solution_str)
14 |     matches = list(match)
15 |     if matches:
16 |         final_answer = matches[-1].group(1).strip()
17 |     else:
18 |         final_answer = None
19 |     if final_answer is not None:
20 |         try:
21 |             int_final_answer = int(final_answer)
22 |         except ValueError:
23 |             final_answer = None
24 |     return final_answer
25 | 
26 | 
27 | def compute_score(solution_str, ground_truth, method='strict', format_score=0.1, score=1.):
28 |     """The scoring function for GSM8k.
29 | 
30 |     Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024.
31 | 
32 |     Args:
33 |         solution_str: the solution text
34 |         ground_truth: the ground truth
35 |         method: the method to extract the solution, choices are 'strict' and 'flexible'
36 |         format_score: the score for the format
37 |         score: the score for the correct answer
38 |     """
39 |     answer = extract_solution(solution_str=solution_str)
40 |     do_print = random.randint(1, 64) == 1
41 |     if do_print:
42 |         print(f"--------------------------------")
43 |         print(f"Ground truth: {ground_truth} | Extracted answer: {answer}")
44 |         print(f"Solution string: {solution_str}")
45 | 
46 |     if answer is None:
47 |         if do_print:
48 |             print(f"No answer found")
49 |         return 0
50 |     else:
51 |         if int(answer) == int(ground_truth):
52 |             if do_print:
53 |                 print(f"Correct answer: {answer}")
54 |             return score
55 |         else:
56 |             if do_print:
57 |                 print(f"Incorrect answer {answer} | Ground truth: {ground_truth}")
58 |             return format_score
59 | 


--------------------------------------------------------------------------------
/verl/utils/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utils for tokenization."""
15 | import warnings
16 | 
17 | __all__ = ['hf_tokenizer']
18 | 
19 | 
20 | def set_pad_token_id(tokenizer):
21 |     """Set pad_token_id to eos_token_id if it is None.
22 | 
23 |     Args:
24 |         tokenizer (transformers.PreTrainedTokenizer): The tokenizer to be set.
25 | 
26 |     """
27 |     if tokenizer.pad_token_id is None:
28 |         tokenizer.pad_token_id = tokenizer.eos_token_id
29 |         warnings.warn(f'tokenizer.pad_token_id is None. Now set to {tokenizer.eos_token_id}')
30 |     if tokenizer.pad_token is None:
31 |         tokenizer.pad_token = tokenizer.eos_token
32 |         warnings.warn(f'tokenizer.pad_token is None. Now set to {tokenizer.eos_token}')
33 | 
34 | 
35 | def hf_tokenizer(name_or_path, correct_pad_token=True, correct_gemma2=True, **kwargs):
36 |     """Create a huggingface pretrained tokenizer.
37 | 
38 |     Args:
39 |         name (str): The name of the tokenizer.
40 |         correct_pad_token (bool): Whether to correct the pad token id.
41 |         correct_gemma2 (bool): Whether to correct the gemma2 tokenizer.
42 |         **kwargs: The keyword arguments for the tokenizer.
43 | 
44 |     Returns:
45 |         transformers.PreTrainedTokenizer: The pretrained tokenizer.
46 | 
47 |     """
48 |     from transformers import AutoTokenizer
49 |     if correct_gemma2 and isinstance(name_or_path, str) and 'gemma-2-2b-it' in name_or_path:
50 |         # the EOS token in gemma2 is ambiguious, which may worsen RL performance.
51 |         # https://huggingface.co/google/gemma-2-2b-it/commit/17a01657f5c87135bcdd0ec7abb4b2dece04408a
52 |         warnings.warn('Found gemma-2-2b-it tokenizer. Set eos_token and eos_token_id to <end_of_turn> and 107.')
53 |         kwargs['eos_token'] = '<end_of_turn>'
54 |         kwargs['eos_token_id'] = 107
55 |     tokenizer = AutoTokenizer.from_pretrained(name_or_path, **kwargs)
56 |     if correct_pad_token:
57 |         set_pad_token_id(tokenizer)
58 |     return tokenizer


--------------------------------------------------------------------------------
/verl/utils/torch_dtypes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Adapted from Cruise.
16 | """
17 | 
18 | import torch
19 | 
20 | from typing import Union
21 | 
22 | HALF_LIST = [16, "16", "fp16", "float16"]
23 | FLOAT_LIST = [32, "32", "fp32", "float32"]
24 | BFLOAT_LIST = ["bf16", "bfloat16"]
25 | 
26 | 
27 | class PrecisionType(object):
28 |     """Type of precision used.
29 | 
30 |     >>> PrecisionType.HALF == 16
31 |     True
32 |     >>> PrecisionType.HALF in (16, "16")
33 |     True
34 |     """
35 | 
36 |     HALF = "16"
37 |     FLOAT = "32"
38 |     FULL = "64"
39 |     BFLOAT = "bf16"
40 |     MIXED = "mixed"
41 | 
42 |     @staticmethod
43 |     def supported_type(precision: Union[str, int]) -> bool:
44 |         return any(x == precision for x in PrecisionType)
45 | 
46 |     @staticmethod
47 |     def supported_types() -> list[str]:
48 |         return [x.value for x in PrecisionType]
49 | 
50 |     @staticmethod
51 |     def is_fp16(precision):
52 |         return precision in HALF_LIST
53 | 
54 |     @staticmethod
55 |     def is_fp32(precision):
56 |         return precision in FLOAT_LIST
57 | 
58 |     @staticmethod
59 |     def is_bf16(precision):
60 |         return precision in BFLOAT_LIST
61 | 
62 |     @staticmethod
63 |     def to_dtype(precision):
64 |         if precision in HALF_LIST:
65 |             return torch.float16
66 |         elif precision in FLOAT_LIST:
67 |             return torch.float32
68 |         elif precision in BFLOAT_LIST:
69 |             return torch.bfloat16
70 |         else:
71 |             raise RuntimeError(f"unexpected precision: {precision}")
72 | 
73 |     @staticmethod
74 |     def to_str(precision):
75 |         if precision == torch.float16:
76 |             return 'fp16'
77 |         elif precision == torch.float32:
78 |             return 'fp32'
79 |         elif precision == torch.bfloat16:
80 |             return 'bf16'
81 |         else:
82 |             raise RuntimeError(f"unexpected precision: {precision}")
83 | 


--------------------------------------------------------------------------------
/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.1


--------------------------------------------------------------------------------
/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/actor/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for Actor
16 | """
17 | from abc import ABC, abstractmethod
18 | from typing import Iterable, Dict
19 | 
20 | from verl import DataProto
21 | import torch
22 | 
23 | __all__ = ['BasePPOActor']
24 | 
25 | 
26 | class BasePPOActor(ABC):
27 | 
28 |     def __init__(self, config):
29 |         """The base class for PPO actor
30 | 
31 |         Args:
32 |             config (DictConfig): a config passed to the PPOActor. We expect the type to be
33 |                 DictConfig (https://omegaconf.readthedocs.io/), but it can be any namedtuple in general.
34 |         """
35 |         super().__init__()
36 |         self.config = config
37 | 
38 |     @abstractmethod
39 |     def compute_log_prob(self, data: DataProto) -> torch.Tensor:
40 |         """Compute logits given a batch of data.
41 | 
42 |         Args:
43 |             data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```,
44 |                 ```attention_mask``` and ```position_ids```.
45 | 
46 |         Returns:
47 |             DataProto: a DataProto containing the key ```log_probs```
48 | 
49 | 
50 |         """
51 |         pass
52 | 
53 |     @abstractmethod
54 |     def update_policy(self, data: DataProto) -> Dict:
55 |         """Update the policy with an iterator of DataProto
56 | 
57 |         Args:
58 |             data (DataProto): an iterator over the DataProto that returns by
59 |                 ```make_minibatch_iterator```
60 | 
61 |         Returns:
62 |             Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model
63 |             such as ```loss```, ```grad_norm```, etc,.
64 | 
65 |         """
66 |         pass
67 | 


--------------------------------------------------------------------------------
/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | from abc import ABC, abstractmethod
18 | 
19 | import torch
20 | 
21 | from verl import DataProto
22 | 
23 | __all__ = ['BasePPOCritic']
24 | 
25 | 
26 | class BasePPOCritic(ABC):
27 | 
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for reward model
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class BasePPORewardModel(ABC):
24 | 
25 |     def __init__(self, config):
26 |         self.config = config
27 | 
28 |     @abstractmethod
29 |     def compute_reward(self, data: DataProto) -> DataProto:
30 |         """Computing reward given input_ids. The transformers should output a tensor with shape
31 |            [batch_size, sequence_length], and the value at [EOS] mask should be gathered.
32 | 
33 |         Args:
34 |             data: must contain keys "input_ids", "attention_mask" and "position_ids".
35 |                 - input_ids: [batch_size, sequence_length]
36 |                 - attention_mask: [batch_size, sequence_length]
37 |                 - position_ids: [batch_size, sequence_length]
38 | 
39 |         Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward.
40 |             Other position should have zero reward. Note that this may change in the future if we use
41 |             dense reward. So, we leave the interface for general case.
42 |             - reward: [batch_size, sequence_length].
43 | 
44 |         """
45 |         pass
46 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .naive import NaiveRollout
17 | from .hf_rollout import HFRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from typing import Iterable, Union
17 | 
18 | from verl import DataProto
19 | 
20 | __all__ = ['BaseRollout']
21 | 
22 | 
23 | class BaseRollout(ABC):
24 | 
25 |     def __init__(self):
26 |         """
27 | 
28 |         Args:
29 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
30 |             should handle when the training stops.
31 |         """
32 |         super().__init__()
33 | 
34 |     @abstractmethod
35 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
36 |         """Generate sequences"""
37 |         pass
38 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .vllm_rollout import vLLMRollout


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available
16 | 
17 | from .base import BaseShardingManager
18 | from .fsdp_ulysses import FSDPUlyssesShardingManager
19 | 
20 | AllGatherPPModel = None
21 | 
22 | if is_megatron_core_available() and is_vllm_available():
23 |     from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager
24 | elif AllGatherPPModel is not None:
25 |     pass
26 | else:
27 |     AllGatherPPModel = None
28 |     MegatronVLLMShardingManager = None
29 | 
30 | if is_vllm_available():
31 |     from .fsdp_vllm import FSDPVLLMShardingManager
32 | else:
33 |     FSDPVLLMShardingManager = None
34 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 | 
23 |     def __enter__(self):
24 |         pass
25 | 
26 |     def __exit__(self, exc_type, exc_value, traceback):
27 |         pass
28 | 
29 |     def preprocess_data(self, data: DataProto) -> DataProto:
30 |         return data
31 | 
32 |     def postprocess_data(self, data: DataProto) -> DataProto:
33 |         return data
34 | 


--------------------------------------------------------------------------------