├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── dataset.yml
    │   ├── e2e_ascend.yml
    │   ├── e2e_dapo.yml
    │   ├── e2e_eval_aime24.yml
    │   ├── e2e_ppo_trainer.yml
    │   ├── e2e_ppo_trainer_megatron.yml
    │   ├── e2e_prime.yml
    │   ├── e2e_sft.yml
    │   ├── model.yml
    │   ├── pre-commit-full.yml
    │   ├── pre-commit.yml
    │   ├── ray_test.yml
    │   ├── sandbox.yml
    │   ├── sanity.yml
    │   ├── scorecard.yml
    │   ├── secrets_scan.yml
    │   └── vllm.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .vscode
    └── settings.json
├── LICENSE
├── Notice.txt
├── README.md
├── docker
    ├── Apptainerfile.rocm
    ├── Dockerfile.megatron
    ├── Dockerfile.ngc.vllm
    ├── Dockerfile.ngc.vllm0.8
    ├── Dockerfile.ngc.vllm0.8.sagemaker
    ├── Dockerfile.rocm
    ├── Dockerfile.sglang
    └── Dockerfile.vemlp.vllm.te
├── docs
    ├── Makefile
    ├── README.md
    ├── README_verl_official.md
    ├── README_vllm0.7.md
    ├── README_vllm0.8.md
    ├── _static
    │   └── logo.png
    ├── accuracy_comparison.svg
    ├── advance
    │   ├── checkpoint.rst
    │   ├── dpo_extension.rst
    │   ├── fsdp_extension.rst
    │   ├── megatron_extension.rst
    │   └── placement.rst
    ├── amd_tutorial
    │   └── amd_build_dockerfile_page.rst
    ├── bar.png
    ├── conf.py
    ├── data.rst
    ├── examples
    │   ├── config.rst
    │   ├── gsm8k_example.rst
    │   └── ppo_code_architecture.rst
    ├── experiment
    │   └── ppo.rst
    ├── faq
    │   └── faq.rst
    ├── fig1.svg
    ├── fig1_sc2.png
    ├── fig1_screenshot.png
    ├── fig2.png
    ├── fig_finding1.svg
    ├── hybrid_flow.rst
    ├── index.rst
    ├── logo-deepeyes.jpg
    ├── perf
    │   ├── device_tuning.rst
    │   └── perf_tuning.rst
    ├── preparation
    │   ├── prepare_data.rst
    │   └── reward_function.rst
    ├── requirements-docs.txt
    ├── start
    │   ├── install.rst
    │   ├── multinode.rst
    │   └── quickstart.rst
    └── workers
    │   ├── fsdp_workers.rst
    │   ├── megatron_workers.rst
    │   ├── ray_trainer.rst
    │   └── sglang_worker.rst
├── eval
    ├── EVALUATION.md
    ├── eval_hrbench.py
    ├── eval_vstar.py
    ├── judge_result.py
    ├── judge_result_hrbench.py
    └── watch_demo.ipynb
├── examples
    ├── agent
    │   ├── eval_rag_results.sh
    │   ├── final_merged_v1v8_thinklite.sh
    │   ├── final_merged_v1v8_thinklite_32b.sh
    │   ├── train_grpo_mm.sh
    │   ├── train_grpo_rag_v2.sh
    │   ├── train_grpo_vlagent.sh
    │   ├── train_grpo_vlagent_v2.sh
    │   ├── train_grpo_vlagent_v3.sh
    │   ├── train_mm_search.sh
    │   ├── train_ppo_32b_rag_v2.sh
    │   ├── train_ppo_frozenlake.sh
    │   ├── train_ppo_frozenlake_7b.sh
    │   ├── train_ppo_frozenlake_debug.sh
    │   ├── train_ppo_rag.sh
    │   └── train_ppo_rag_v2.sh
    ├── checkpoint
    │   ├── run_deepseek_megatron_ckpt.sh
    │   └── run_qwen_megatron_ckpt.sh
    ├── data_preprocess
    │   ├── full_hh_rlhf.py
    │   ├── geo3k.py
    │   ├── gsm8k.py
    │   ├── hellaswag.py
    │   ├── math_dataset.py
    │   └── multiturn.py
    ├── generation
    │   ├── run_deepseek7b_mutli_node.sh
    │   └── run_deepseek_v2_lite_math.sh
    ├── grpo_trainer
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_math.sh
    │   ├── run_deepseek7b_llm_math_megatron.sh
    │   ├── run_deepseek7b_llm_megatron.sh
    │   ├── run_deepseek7b_llm_seq_balance.sh
    │   ├── run_qwen2-7b.sh
    │   ├── run_qwen2-7b_math.sh
    │   ├── run_qwen2-7b_math_megatron.sh
    │   ├── run_qwen2-7b_megatron.sh
    │   ├── run_qwen2-7b_seq_balance.sh
    │   └── run_qwen2_5_vl-7b.sh
    ├── ppo_trainer
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_modelscope.sh
    │   ├── run_deepseek7b_llm_sp2.sh
    │   ├── run_deepseek_full_hh_rlhf.sh
    │   ├── run_deepseek_math_gsm8k_megatron.sh
    │   ├── run_deepseek_megatron.sh
    │   ├── run_gemma.sh
    │   ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh
    │   ├── run_qwen2-7b_math_gsm8k_megatron.sh
    │   ├── run_qwen2-7b_megatron.sh
    │   ├── run_qwen2-7b_rm.sh
    │   ├── run_qwen2-7b_rm_seq_balance.sh
    │   ├── run_qwen2-7b_seq_balance.sh
    │   ├── run_qwen2-7b_sglang_seq_balance.sh
    │   ├── run_qwen2.5-32b.sh
    │   └── verl_getting_started.ipynb
    ├── ray
    │   └── tutorial.ipynb
    ├── reinforce_plus_plus_trainer
    │   ├── run_qwen2-7b_math_rf.sh
    │   └── run_qwen2-7b_math_rf_baseline.sh
    ├── remax_trainer
    │   ├── run_qwen2.5-3b_seq_balance.sh
    │   └── run_qwen2.5-7b_seq_balance.sh
    ├── rloo_trainer
    │   └── run_qwen2-7b.sh
    ├── sft
    │   ├── gsm8k
    │   │   ├── run_deepseek_6b7.sh
    │   │   ├── run_gemma_2b.sh
    │   │   ├── run_gemma_7b.sh
    │   │   ├── run_qwen_05_peft.sh
    │   │   ├── run_qwen_05_sp2.sh
    │   │   └── run_qwen_05_sp2_liger.sh
    │   └── multiturn
    │   │   └── run_qwen_05_sp2.sh
    ├── slurm
    │   └── ray_on_slurm.slurm
    ├── split_placement
    │   ├── README.md
    │   ├── config
    │   │   └── ppo_trainer_split.yaml
    │   ├── main_ppo_split.py
    │   ├── run_deepseek7b_llm.sh
    │   └── split_monkey_patch.py
    └── tuning
    │   ├── 14b
    │       └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh
    │   ├── 70b
    │       ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh
    │       └── qwen2-70b_grpo_32_h800_fsdp_vllm.sh
    │   └── 7b
    │       └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh
├── patches
    └── megatron_v4.patch
├── pyproject.toml
├── recipe
    ├── dapo
    │   ├── README.md
    │   ├── prepare_dapo_data.sh
    │   ├── run_dapo_early_qwen2.5_32b.sh
    │   ├── run_dapo_qwen2.5_32b.sh
    │   ├── src
    │   │   ├── config
    │   │   │   └── dapo_trainer.yaml
    │   │   ├── dapo_ray_trainer.py
    │   │   └── main_dapo.py
    │   └── test_dapo_7b.sh
    ├── drgrpo
    │   └── README.md
    ├── prime
    │   ├── __init__.py
    │   ├── config
    │   │   └── prime_trainer.yaml
    │   ├── main_prime.py
    │   ├── prime_core_algos.py
    │   ├── prime_dp_rm.py
    │   ├── prime_fsdp_workers.py
    │   ├── prime_ray_trainer.py
    │   └── run_prime_qwen.sh
    └── r1
    │   ├── README.md
    │   ├── __init__.py
    │   ├── config
    │       └── evaluation.yaml
    │   ├── data_process.py
    │   ├── main_eval.py
    │   ├── reward_score.py
    │   ├── run_r1_distill_qwen.sh
    │   └── tasks
    │       ├── __init__.py
    │       ├── gpqa.py
    │       ├── livecodebench.py
    │       └── math.py
├── requirements.txt
├── requirements_deepeyes.txt
├── requirements_sglang.txt
├── scripts
    ├── converter_hf_to_mcore.py
    ├── diagnose.py
    ├── install_deepeyes.sh
    └── model_merger.py
├── setup.py
├── tests
    ├── __init__.py
    ├── checkpoint
    │   └── test_fsdp_ckpt.py
    ├── distributed
    │   ├── run_all.sh
    │   └── test_tensor_dict.py
    ├── e2e
    │   ├── __init__.py
    │   ├── arithmetic_sequence
    │   │   ├── data
    │   │   │   ├── create_dataset.py
    │   │   │   ├── test.parquet
    │   │   │   └── train.parquet
    │   │   ├── model
    │   │   │   ├── config.json
    │   │   │   ├── create_model_tokenizer.py
    │   │   │   ├── generation_config.json
    │   │   │   ├── model.safetensors
    │   │   │   └── tokenizer_config.json
    │   │   └── rl
    │   │   │   ├── README.md
    │   │   │   └── main_trainer.py
    │   ├── check_custom_rwd_fn.py
    │   ├── check_results.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   └── digit_completion
    │   │   │   ├── __init__.py
    │   │   │   ├── task.py
    │   │   │   └── tokenizer.py
    │   ├── ppo_trainer
    │   │   ├── run_function_reward.sh
    │   │   └── run_model_reward.sh
    │   ├── run_dapo.sh
    │   ├── run_ppo_trainer_megatron.sh
    │   ├── run_prime.sh
    │   ├── run_r1_distill_qwen_aime24_eval.sh
    │   ├── run_ray_trainer.sh
    │   ├── run_ray_trainer_fire_sampling.sh
    │   ├── run_ray_trainer_rmpad.sh
    │   ├── run_test.sh
    │   └── sft
    │   │   ├── run_sft.sh
    │   │   └── test_sp_loss_match.py
    ├── generation
    │   └── run_gen_qwen05.sh
    ├── gpu_utility
    │   ├── test_memory_buffers.py
    │   ├── test_ops.py
    │   └── test_torch_functional.py
    ├── kill_github_tests.sh
    ├── model
    │   ├── test_transformer.py
    │   └── test_transformers_ulysses.py
    ├── ray
    │   ├── check_worker_alive
    │   │   └── main.py
    │   ├── detached_worker
    │   │   ├── README.md
    │   │   ├── client.py
    │   │   ├── run.sh
    │   │   └── server.py
    │   ├── test_check_worker_alive.py
    │   ├── test_colocated_workers.py
    │   ├── test_data_transfer.py
    │   ├── test_driverfunc_to_worker.py
    │   ├── test_high_level_scheduling_api.py
    │   ├── test_ray_local_envs.py
    │   ├── test_rvdz.py
    │   ├── test_worker_group_basics.py
    │   └── test_worker_group_torch.py
    ├── rollout
    │   ├── run_fsdp_vllm.py
    │   ├── test_sglang_spmd.py
    │   ├── test_vllm_hf_loader.py
    │   └── test_vllm_spmd.py
    ├── sandbox
    │   └── test_sandbox.py
    ├── sanity
    │   ├── check_license.py
    │   └── test_import.py
    ├── utility
    │   └── test_tensor_dict_utilities.py
    └── verl
    │   └── utils
    │       ├── dataset
    │           ├── test_multiturn_sft_dataset.py
    │           ├── test_rl_dataset.py
    │           ├── test_rm_dataset.py
    │           └── test_sft_dataset.py
    │       ├── test_import_utils.py
    │       └── test_module.py
└── verl
    ├── __init__.py
    ├── models
        ├── README.md
        ├── __init__.py
        ├── llama
        │   ├── __init__.py
        │   └── megatron
        │   │   ├── __init__.py
        │   │   ├── checkpoint_utils
        │   │       ├── __init__.py
        │   │       ├── llama_loader.py
        │   │       ├── llama_loader_depracated.py
        │   │       └── llama_saver.py
        │   │   ├── layers
        │   │       ├── __init__.py
        │   │       ├── parallel_attention.py
        │   │       ├── parallel_decoder.py
        │   │       ├── parallel_linear.py
        │   │       ├── parallel_mlp.py
        │   │       └── parallel_rmsnorm.py
        │   │   └── modeling_llama_megatron.py
        ├── mcore
        │   ├── __init__.py
        │   ├── config_converter.py
        │   ├── loader.py
        │   ├── model_forward.py
        │   ├── model_initializer.py
        │   ├── readme.md
        │   ├── registry.py
        │   ├── saver.py
        │   ├── util.py
        │   └── weight_converter.py
        ├── qwen2
        │   ├── __init__.py
        │   └── megatron
        │   │   ├── __init__.py
        │   │   ├── checkpoint_utils
        │   │       ├── __init__.py
        │   │       ├── qwen2_loader.py
        │   │       ├── qwen2_loader_depracated.py
        │   │       └── qwen2_saver.py
        │   │   ├── layers
        │   │       ├── __init__.py
        │   │       ├── parallel_attention.py
        │   │       ├── parallel_decoder.py
        │   │       ├── parallel_linear.py
        │   │       ├── parallel_mlp.py
        │   │       └── parallel_rmsnorm.py
        │   │   └── modeling_qwen2_megatron.py
        ├── registry.py
        ├── transformers
        │   ├── __init__.py
        │   ├── llama.py
        │   ├── monkey_patch.py
        │   ├── qwen2.py
        │   └── qwen2_vl.py
        └── weight_loader_registry.py
    ├── protocol.py
    ├── single_controller
        ├── __init__.py
        ├── base
        │   ├── __init__.py
        │   ├── decorator.py
        │   ├── megatron
        │   │   ├── __init__.py
        │   │   ├── worker.py
        │   │   └── worker_group.py
        │   ├── register_center
        │   │   ├── __init__.py
        │   │   └── ray.py
        │   ├── worker.py
        │   └── worker_group.py
        └── ray
        │   ├── __init__.py
        │   ├── base.py
        │   └── megatron.py
    ├── third_party
        ├── __init__.py
        ├── sglang
        │   ├── __init__.py
        │   └── parallel_state.py
        └── vllm
        │   ├── __init__.py
        │   ├── vllm_v_0_5_4
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   └── vllm_v_0_6_3
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
    ├── trainer
        ├── __init__.py
        ├── config
        │   ├── evaluation.yaml
        │   ├── generation.yaml
        │   ├── ppo_megatron_trainer.yaml
        │   ├── ppo_trainer.yaml
        │   └── sft_trainer.yaml
        ├── fsdp_sft_trainer.py
        ├── main_eval.py
        ├── main_generation.py
        ├── main_ppo.py
        ├── ppo
        │   ├── __init__.py
        │   ├── core_algos.py
        │   ├── metric_utils.py
        │   └── ray_trainer.py
        └── runtime_env.yaml
    ├── utils
        ├── __init__.py
        ├── checkpoint
        │   ├── __init__.py
        │   ├── checkpoint_manager.py
        │   ├── fsdp_checkpoint_manager.py
        │   └── megatron_checkpoint_manager.py
        ├── config.py
        ├── dataset
        │   ├── README.md
        │   ├── __init__.py
        │   ├── multiturn_sft_dataset.py
        │   ├── rl_dataset.py
        │   ├── rm_dataset.py
        │   ├── sft_dataset.py
        │   └── vision_utils.py
        ├── debug
        │   ├── __init__.py
        │   ├── performance.py
        │   └── trajectory_tracker.py
        ├── distributed.py
        ├── flops_counter.py
        ├── fs.py
        ├── fsdp_utils.py
        ├── hdfs_io.py
        ├── import_utils.py
        ├── logger
        │   ├── __init__.py
        │   └── aggregate_logger.py
        ├── logging_utils.py
        ├── megatron
        │   ├── __init__.py
        │   ├── memory.py
        │   ├── optimizer.py
        │   ├── pipeline_parallel.py
        │   ├── sequence_parallel.py
        │   └── tensor_parallel.py
        ├── megatron_utils.py
        ├── memory_buffer.py
        ├── model.py
        ├── py_functional.py
        ├── ray_utils.py
        ├── rendezvous
        │   ├── __init__.py
        │   └── ray_backend.py
        ├── reward_score
        │   ├── __init__.py
        │   ├── agent.py
        │   ├── geo3k.py
        │   ├── gsm8k.py
        │   ├── math.py
        │   ├── math_batch.py
        │   ├── math_dapo.py
        │   ├── math_verify.py
        │   ├── prime_code
        │   │   ├── __init__.py
        │   │   ├── testing_util.py
        │   │   └── utils.py
        │   ├── prime_math
        │   │   ├── __init__.py
        │   │   ├── grader.py
        │   │   └── math_normalize.py
        │   └── vl_agent.py
        ├── rl_logging_board_utils.py
        ├── seqlen_balancing.py
        ├── tensorboard_utils.py
        ├── tokenizer.py
        ├── torch_dtypes.py
        ├── torch_functional.py
        ├── tracking.py
        └── ulysses.py
    ├── version
        └── version
    └── workers
        ├── __init__.py
        ├── actor
            ├── __init__.py
            ├── base.py
            ├── dp_actor.py
            └── megatron_actor.py
        ├── agent
            ├── __init__.py
            ├── envs
            │   ├── ENV_README.md
            │   ├── __init__.py
            │   ├── frozenlake
            │   │   ├── __init__.py
            │   │   ├── create_dataset.py
            │   │   └── frozenlake.py
            │   ├── mm_process_engine
            │   │   ├── __init__.py
            │   │   ├── crop_engine.py
            │   │   ├── prompt.py
            │   │   ├── visual_toolbox.py
            │   │   ├── visual_toolbox_v2.py
            │   │   ├── visual_toolbox_v3.py
            │   │   ├── visual_toolbox_v4.py
            │   │   └── visual_toolbox_v5.py
            │   ├── rag_engine
            │   │   ├── __init__.py
            │   │   ├── generate_testset.py
            │   │   ├── generate_trainset.py
            │   │   ├── rag_engine.py
            │   │   ├── rag_engine_v2.py
            │   │   └── test_client.py
            │   ├── sokoban
            │   │   ├── __init__.py
            │   │   └── sokoban.py
            │   └── visual_agent
            │   │   ├── __init__.py
            │   │   ├── generate_testset.py
            │   │   ├── generate_trainset.py
            │   │   ├── mm_search_engine.py
            │   │   ├── vl_agent_v1.py
            │   │   ├── vl_agent_v2.py
            │   │   └── vl_agent_v3.py
            ├── parallel_env.py
            └── tool_envs.py
        ├── critic
            ├── __init__.py
            ├── base.py
            ├── dp_critic.py
            └── megatron_critic.py
        ├── fsdp_workers.py
        ├── megatron_workers.py
        ├── reward_manager
            ├── __init__.py
            ├── batch.py
            ├── dapo.py
            ├── naive.py
            └── prime.py
        ├── reward_model
            ├── __init__.py
            ├── base.py
            └── megatron
            │   ├── __init__.py
            │   └── reward_model.py
        ├── rollout
            ├── __init__.py
            ├── base.py
            ├── hf_rollout.py
            ├── naive
            │   ├── __init__.py
            │   └── naive_rollout.py
            ├── sglang_rollout
            │   ├── __init__.py
            │   └── sglang_rollout.py
            ├── tokenizer.py
            └── vllm_rollout
            │   ├── __init__.py
            │   ├── fire_vllm_rollout.py
            │   ├── vllm_rollout.py
            │   └── vllm_rollout_spmd.py
        └── sharding_manager
            ├── __init__.py
            ├── base.py
            ├── fsdp_sglang.py
            ├── fsdp_ulysses.py
            ├── fsdp_vllm.py
            ├── megatron_vllm.py
            └── patch
                ├── __init__.py
                └── fsdp_vllm_patch.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | ## Enabled the dependabot to check the dependencies of the project
2 | ## Dependabot will open pull requests to update dependencies automatically
3 | 
4 | version: 2
5 | updates:
6 |   - package-ecosystem: pip
7 |     directory: "/"
8 |     schedule:
9 |       interval: weekly


--------------------------------------------------------------------------------
/.github/workflows/e2e_ascend.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_ascend
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |     paths:
11 |       - "**/*.py"
12 |       - .github/workflows/e2e_ascend.yml
13 | 
14 | permissions:
15 |   contents: read
16 |   
17 | jobs:
18 |   test:
19 |     name: verl Ascend test (self-host)
20 |     runs-on: [self-hosted, npu-0]
21 |     timeout-minutes: 5 # Increase this timeout value as needed
22 |     container:
23 |       image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
24 |       volumes:
25 |         - /usr/local/dcmi:/usr/local/dcmi
26 |         - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
27 |         - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
28 |         # Use self-host cache speed up pip and model download
29 |         # - /home/action/actions-runner/_work/cache:/github/home/.cache/
30 |       options: >-
31 |         --device /dev/davinci0
32 |         --device /dev/davinci_manager
33 |         --device /dev/devmm_svm
34 |         --device /dev/hisi_hdc
35 |         --privileged
36 |         --network "host"
37 |     steps:
38 |       - name: Check npu and CANN info
39 |         run: |
40 |           cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
41 |           npu-smi info
42 |       - name: Checkout volcengine/verl repo
43 |         uses: actions/checkout@v4
44 |       - name: Run test
45 |         run: |
46 |           lscpu
47 | 


--------------------------------------------------------------------------------
/.github/workflows/e2e_dapo.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_dapo
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |       - v0.*
11 |     paths:
12 |       - "**/*.py"
13 |       # Home
14 |       - "recipe/dapo/src"
15 |       # Entrypoints
16 |       - ".github/workflows/e2e_dapo.yml"
17 |       - "examples/data_preprocess/gsm8k.py"
18 |       - "tests/e2e/run_dapo.sh"
19 |       - "!examples"
20 |       - "!verl/trainer/main_*.py"
21 |       - "!verl/trainer/fsdp_sft_trainer.py"
22 |       # Megatron
23 |       - "!verl/workers/**/megatron_*.py"
24 | 
25 | 
26 | # Declare permissions just read content.
27 | permissions:
28 |   contents: read
29 | 
30 | jobs:
31 |   e2e_dapo:
32 |     runs-on: [L20x8]
33 |     timeout-minutes: 40 # Increase this timeout value as needed
34 |     env:
35 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
36 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
37 |       NO_PROXY: "localhost,127.0.0.1"
38 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
39 |     container:
40 |       image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
41 |       options: --gpus all --shm-size=10g
42 |     steps:
43 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
44 |         with:
45 |             fetch-depth: 0
46 |       - name: Install the current repository
47 |         run: |
48 |           pip3 install -e .[test,gpu]
49 |       - name: Prepare GSM8K dataset
50 |         run: |
51 |           python3 examples/data_preprocess/gsm8k.py
52 |       - name: Running the E2E test with the DAPO algorithm
53 |         run: | 
54 |           ray stop --force
55 |           bash tests/e2e/run_dapo.sh


--------------------------------------------------------------------------------
/.github/workflows/e2e_eval_aime24.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_eval_aime24
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |     paths:
11 |       - "**/*.py"
12 |       # Home
13 |       - "recipe/r1"
14 |       - "!recipe/r1/README.md"
15 |       # Entrypoints
16 |       - ".github/workflows/e2e_eval_aime24.yml"
17 |       - "tests/e2e/run_r1_distill_qwen_aime24_eval.sh"
18 |       - "verl/trainer/main_generation.py"
19 |       - "verl/trainer/config/generation.yaml"
20 |       - "!examples"
21 |       - "!verl/trainer/main_*.py"
22 |       - "!verl/trainer/fsdp_sft_trainer.py"
23 |       # Other recipes
24 |       - "!recipe"
25 | 
26 | # Declare permissions just read content.
27 | permissions:
28 |   contents: read
29 | 
30 | jobs:
31 |   e2e_eval_aime24:
32 |     runs-on: [L20x8]
33 |     timeout-minutes: 40 # Increase this timeout value as needed
34 |     env:
35 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
36 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
37 |       NO_PROXY: "localhost,127.0.0.1"
38 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
39 |     container:
40 |       image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
41 |       options: --gpus all --shm-size=10g
42 |     steps:
43 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
44 |         with:
45 |             fetch-depth: 0
46 |       - name: Install the current repository
47 |         run: |
48 |           pip3 install -e .[test,gpu,math]
49 |       - name: Prepare aime24 dataset
50 |         run: |
51 |           ray stop --force
52 |           python3 recipe/r1/data_process.py --task aime2024
53 |       - name: Running generation and evaluation in AIME 2024
54 |         run: | 
55 |           ray stop --force
56 |           bash tests/e2e/run_r1_distill_qwen_aime24_eval.sh


--------------------------------------------------------------------------------
/.github/workflows/e2e_prime.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_prime
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |       - v0.*
11 |     paths:
12 |       - "**/*.py"
13 |       # Home
14 |       - "recipe/prime"
15 |       # Entrypoints
16 |       - ".github/workflows/e2e_prime.yml"
17 |       - "examples/data_preprocess/gsm8k.py"
18 |       - "tests/e2e/run_prime.sh"
19 |       - "!examples"
20 |       - "!verl/trainer/main_*.py"
21 |       - "!verl/trainer/fsdp_sft_trainer.py"
22 |       # Megatron
23 |       - "!verl/workers/**/megatron_*.py"
24 | 
25 | # Declare permissions just read content.
26 | permissions:
27 |   contents: read
28 | 
29 | jobs:
30 |   e2e_prime:
31 |     runs-on: [L20x8]
32 |     timeout-minutes: 40 # Increase this timeout value as needed
33 |     env:
34 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
35 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
36 |       NO_PROXY: "localhost,127.0.0.1"
37 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
38 |     container:
39 |       image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
40 |       options: --gpus all --shm-size=10g
41 |     steps:
42 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
43 |         with:
44 |             fetch-depth: 0
45 |       - name: Install the current repository
46 |         run: |
47 |           pip3 install -e .[test,gpu]
48 |       - name: Prepare gsm8k dataset
49 |         run: |
50 |           ray stop --force
51 |           python3 examples/data_preprocess/gsm8k.py
52 |       - name: Running GSM8K E2E with prime alg
53 |         run: | 
54 |           ray stop --force
55 |           bash tests/e2e/run_prime.sh


--------------------------------------------------------------------------------
/.github/workflows/pre-commit-full.yml:
--------------------------------------------------------------------------------
 1 | # TODO: Remove this once all the files are fixed, i.e., `pre-commit run --all-files` passes
 2 | name: pre-commit-full
 3 | 
 4 | # Run weekly on Sunday at 00:00 UTC
 5 | on:
 6 |   schedule:
 7 |     - cron: '0 0 * * 0'
 8 |   # Allow manual triggering
 9 |   workflow_dispatch:
10 | 
11 | # Declare permissions just read content.
12 | permissions: 
13 |   contents: read
14 | 
15 | jobs:
16 |   pre-commit-full:
17 |     runs-on: ubuntu-latest
18 |     strategy:
19 |       matrix:
20 |         python-version: ["3.12"]
21 |     steps:
22 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Set ruff --output-format=github
28 |       run: |
29 |         sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
30 |         git add .pre-commit-config.yaml
31 |     - uses: pre-commit/action@v3.0.1


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | # c.f. https://github.com/pre-commit/action?tab=readme-ov-file#using-this-action
 2 | name: pre-commit
 3 | 
 4 | # No need to avoid / cancel lightweight pre-commit jobs
 5 | on:
 6 |   pull_request:
 7 |   push:
 8 |     branches:
 9 |       - main
10 |       - v0.2.x
11 | 
12 | # Declare permissions just read content.
13 | permissions: 
14 |   contents: read
15 | 
16 | jobs:
17 |   pre-commit:
18 |     runs-on: ubuntu-latest
19 |     strategy:
20 |       matrix:
21 |         python-version: ["3.12"]
22 |     steps:
23 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
24 |     - name: Set up Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Set ruff --output-format=github
29 |       run: |
30 |         sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
31 |         git add .pre-commit-config.yaml
32 |     - uses: pre-commit/action@v3.0.1
33 |       with:
34 |         extra_args: "" # Overriding "--all-files"
35 |         # TODO: Remove this once all the files are fixed, i.e., `pre-commit run --all-files` passes


--------------------------------------------------------------------------------
/.github/workflows/ray_test.yml:
--------------------------------------------------------------------------------
 1 | name: ray
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |       - v0.*
11 |     paths:
12 |       - "verl/single_controller/*.py"
13 |       - .github/workflows/ray_test.yml
14 |       - "!recipe/**"
15 | 
16 | # Cancel jobs on the same ref if a new one is triggered
17 | concurrency:
18 |   group: ${{ github.workflow }}-${{ github.ref }}
19 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
20 | 
21 | # Declare permissions just read content.
22 | permissions: 
23 |   contents: read
24 | 
25 | jobs:
26 |   ray:
27 |     runs-on: [L20x8]
28 |     timeout-minutes: 10 # Increase this timeout value as needed
29 |     env:
30 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
31 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
32 |       NO_PROXY: "localhost,127.0.0.1"
33 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
34 |     container:
35 |       image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
36 |       options: --gpus all --shm-size=10g
37 |     steps:
38 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
39 |         with:
40 |             fetch-depth: 0
41 |       - name: Install the current repository
42 |         run: |
43 |           pip install -e .[test]
44 |           pip install --upgrade "ray>=2.40.0"
45 |       - name: Running ray tests that need 8 GPUs
46 |         run: |
47 |           cd tests/ray
48 |           pytest -s -x --ignore=test_check_worker_alive.py --ignore=test_rvdz.py .
49 | 


--------------------------------------------------------------------------------
/.github/workflows/sandbox.yml:
--------------------------------------------------------------------------------
 1 | name: sandbox
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |       - v0.*
11 |     paths:
12 |       - "**/*.py"
13 |       - .github/workflows/sandbox.yml
14 | 
15 | # Cancel jobs on the same ref if a new one is triggered
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.ref }}
18 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
19 | 
20 | # Declare permissions just read content.
21 | permissions: 
22 |   contents: read
23 | 
24 | jobs:
25 |   sandbox:
26 |     runs-on: [L20x8]
27 |     timeout-minutes: 10 # Increase this timeout value as needed
28 |     env:
29 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
30 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
31 |       NO_PROXY: "localhost,127.0.0.1"
32 |       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
33 |     container:
34 |       image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
35 |       options: --gpus all --shm-size=10g
36 |     steps:
37 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
38 |         with:
39 |             fetch-depth: 0
40 |       - name: Install the current repository
41 |         run: |
42 |           pip3 install -e .[test,prime]
43 |           pip3 install vllm==0.5.4
44 |       - name: Running sandbox tests on 8 L20 GPUs
45 |         run: |
46 |           cd tests/sandbox
47 |           pytest -s -x .
48 | 


--------------------------------------------------------------------------------
/.github/workflows/sanity.yml:
--------------------------------------------------------------------------------
 1 | name: sanity
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |       - v0.*
11 |     paths:
12 |       - "**/*.py"
13 |       - .github/workflows/sanity.yml
14 | 
15 | # Cancel jobs on the same ref if a new one is triggered
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.ref }}
18 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
19 | 
20 | # Declare permissions just read content.
21 | permissions: 
22 |   contents: read
23 | 
24 | jobs:
25 |   sanity:
26 |     runs-on: ubuntu-latest
27 |     timeout-minutes: 5 # Increase this timeout value as needed
28 |     strategy:
29 |       matrix:
30 |         python-version: ["3.10"]
31 |     steps:
32 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
33 |       - name: Set up Python ${{ matrix.python-version }}
34 |         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
35 |         with:
36 |           python-version: ${{ matrix.python-version }}
37 |       - name: Install the current repository
38 |         run: |
39 |           pip install -e .[test]
40 |       - name: Run sanity test
41 |         run: |
42 |           pytest -s -x tests/sanity
43 |       - name: Run utility test
44 |         run: |
45 |           pytest -s -x tests/utility
46 |       - name: Run license test
47 |         run: |
48 |           python3 tests/sanity/check_license.py --directory .
49 | 


--------------------------------------------------------------------------------
/.github/workflows/secrets_scan.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |   pull_request:
 6 | 
 7 | permissions:
 8 |   contents: read
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - name: Checkout code
15 |       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
16 |       with:
17 |         fetch-depth: 0
18 |     - name: Secret Scanning
19 |       uses: trufflesecurity/trufflehog@7dc056a193116ba8d82154bf0549381c8fb8545c # v3.88.14
20 |       with:
21 |         extra_args: --results=verified,unknown


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
3 |     rev: "v0.11.4"
4 |     hooks:
5 |       - id: ruff
6 |         args: ["--fix", "--show-fixes", "--output-format=full"]
7 |       - id: ruff-format


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-22.04
 8 |   tools:
 9 |     python: "3.11"
10 |     rust: "1.70"
11 | 
12 | sphinx:
13 |   configuration: docs/conf.py
14 | 
15 | python:
16 |   install:
17 |     - requirements: docs/requirements-docs.txt
18 |     - method: pip
19 |       path: .
20 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "[python]": {
3 |         "editor.defaultFormatter": "charliermarsh.ruff",
4 |         "editor.codeActionsOnSave": {
5 |             "source.organizeImports": "always",
6 |         }
7 |     }
8 | }


--------------------------------------------------------------------------------
/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/docker/Apptainerfile.rocm:
--------------------------------------------------------------------------------
 1 | Bootstrap: docker
 2 | 
 3 | # Support - Traing: fsdp; Inference: vllm
 4 | # FROM: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
 5 | # Support - Traing: fsdp; Inference: vllm, sglang
 6 | FROM lmsysorg/sglang:v0.4.5-rocm630
 7 | 
 8 | %environment
 9 |     export PYTORCH_ROCM_ARCH="gfx90a;gfx942"
10 | 
11 |     export HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
12 |     export CFLAGS="-D__HIP_PLATFORM_AMD__"
13 |     export CXXFLAGS="-D__HIP_PLATFORM_AMD__"
14 | 
15 | %post
16 |     # Create source directory
17 |     mkdir -p /opt/src
18 | 
19 |     # Uninstall and reinstall vllm
20 |     pip uninstall -y vllm
21 |     cd /opt/src
22 |     git clone -b v0.6.3 https://github.com/vllm-project/vllm.git
23 |     cd vllm
24 |     MAX_JOBS=$(nproc) python3 setup.py install
25 |     cd /opt
26 |     rm -rf /opt/src/vllm
27 | 
28 |     # Install dependencies
29 |     pip install "tensordict<0.6" --no-deps
30 |     pip install accelerate \
31 |         codetiming \
32 |         datasets \
33 |         dill \
34 |         hydra-core \
35 |         liger-kernel \
36 |         numpy \
37 |         pandas \
38 |         peft \
39 |         "pyarrow>=15.0.0" \
40 |         pylatexenc \
41 |         "ray[data,train,tune,serve]" \
42 |         torchdata \
43 |         transformers \
44 |         wandb \
45 |         orjson \
46 |         pybind11
47 | 
48 |     # Clone and install verl from GitHub
49 |     cd /opt
50 |     git clone https://github.com/volcengine/verl.git
51 |     cd verl
52 |     # Uncomment to use a specific version
53 |     # git checkout v0.3.0.post0
54 |     pip install -e . --no-deps
55 | 
56 |     # Install torch_memory_saver
57 |     pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps


--------------------------------------------------------------------------------
/docker/Dockerfile.megatron:
--------------------------------------------------------------------------------
 1 | FROM hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
 2 | 
 3 | # Define environments
 4 | ENV MAX_JOBS=64
 5 | 
 6 | RUN apt-get update && \
 7 |     apt-get install -y aria2
 8 | 
 9 | # 1. Reinstall CUDA 12.4
10 | RUN aria2c https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \
11 |     mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
12 | 
13 | RUN aria2c --always-resume=true --max-tries=99999 https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb
14 | 
15 | RUN dpkg -i cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb
16 | 
17 | RUN cp /var/cuda-repo-ubuntu2204-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/
18 | 
19 | RUN apt-get update
20 | 
21 | RUN apt-get -y install cuda-toolkit-12-4
22 | 
23 | RUN rm cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb
24 | 
25 | RUN update-alternatives --set cuda /usr/local/cuda-12.4
26 | 
27 | # 2. Reinstall Flash attn 2.7.3
28 | RUN pip uninstall -y flash-attn && \
29 |     wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
30 |     pip install --no-cache-dir flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
31 |     rm flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl 
32 | 
33 | # 3. Install Apex
34 | RUN git clone https://github.com/NVIDIA/apex.git && \
35 |     cd apex && \
36 |     pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
37 | 
38 | # 4. Install TransformerEngine
39 | RUN export NVTE_FRAMEWORK=pytorch && pip3 install --no-deps git+https://github.com/NVIDIA/TransformerEngine.git@v2.0
40 | 
41 | # 5. Install Megatron-LM
42 | RUN pip3 install git+https://github.com/NVIDIA/Megatron-LM.git@v0.11.0


--------------------------------------------------------------------------------
/docker/Dockerfile.ngc.vllm:
--------------------------------------------------------------------------------
 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:ngc-th2.4.0-cu124-vllm0.6.3-ray2.4-te1.7-v0.0.6" -f docker/Dockerfile.ngc.vllm . --builder cloud-verlai-verl-builder --progress=plain --push
 2 | FROM nvcr.io/nvidia/pytorch:24.05-py3
 3 | 
 4 | # uninstall nv-pytorch fork
 5 | RUN pip3 uninstall pytorch-quantization \
 6 |     pytorch-triton \
 7 |     torch \
 8 |     torch-tensorrt \
 9 |     torchvision \
10 |     xgboost transformer_engine flash_attn \
11 |     apex megatron-core -y
12 | 
13 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124
14 | 
15 | # =============== Megatron dependencies (optional) =================
16 | # install apex, set MAX_JOBS to avoid OOMs
17 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
18 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
19 |     git+https://github.com/NVIDIA/apex
20 | # =============== End of Megatron dependencies (optional) =================
21 | 
22 | RUN pip3 install --no-cache-dir \
23 |     accelerate \
24 |     codetiming \
25 |     datasets \
26 |     dill \
27 |     hydra-core \
28 |     numpy \
29 |     'pandas' \
30 |     'peft' \
31 |     'pyarrow>=15.0.0' \
32 |     'pybind11' \
33 |     'pylatexenc' \
34 |     'ray>=2.10' \
35 |     'tensordict<0.6' \
36 |     'transformers' \
37 |     'vllm==0.6.3.post1' \
38 |     'wandb'
39 | 
40 | # full dependencies
41 | RUN pip3 install pytest pre-commit py-spy pyext liger-kernel
42 | 
43 | # =============== Megatron dependencies (optional) =================
44 | # install Transformer Engine, which requires FA 2.5.8. Do it in a separate step for docker cache
45 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation
46 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0
47 | # =============== End of Megatron dependencies (optional) =================
48 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.ngc.vllm0.8.sagemaker:
--------------------------------------------------------------------------------
 1 | # Using a pre-built image from AWS DLC which contains the current version of python (3.10) and supported cuda version (12.1)
 2 | FROM 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:2.1.0-transformers4.36.0-gpu-py310-cu121-ubuntu20.04
 3 | 
 4 | # uninstall nv-pytorch fork
 5 | RUN pip3 uninstall -y pytorch-quantization \
 6 |     pytorch-triton torch torch-tensorrt torchvision \
 7 |     xgboost transformer_engine flash_attn apex megatron-core
 8 | 
 9 | # Define environments
10 | ENV MAX_JOBS=32
11 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
12 | ENV DEBIAN_FRONTEND=noninteractive
13 | ENV NODE_OPTIONS=""
14 | ENV HF_HUB_ENABLE_HF_TRANSFER="1"
15 | 
16 | # Install systemctl
17 | RUN apt-get update && \
18 |     apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
19 |     apt-get clean
20 | 
21 | # Install tini
22 | RUN apt-get update && \
23 |     apt-get install -y tini && \
24 |     apt-get clean
25 | 
26 | # Install torch-2.6.0 + vllm-0.8.2
27 | RUN pip install --no-cache-dir vllm==0.8.2 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata==0.11.0 \
28 |     transformers>=4.49.0 accelerate datasets peft hf-transfer \
29 |     ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \
30 |     pytest pre-commit py-spy pyext pre-commit ruff
31 | 
32 | # Install flash_attn-2.7.4.post1
33 | RUN pip uninstall -y transformer-engine flash-attn && \
34 |     pip install flash-attn==2.7.4.post1 --no-build-isolation
35 | 
36 | # Fix cv2
37 | RUN pip uninstall -y pynvml nvidia-ml-py && \
38 |     pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
39 |     pip install --no-cache-dir --upgrade optree>=0.13.0
40 | 
41 | # Install verl
42 | RUN pip install --no-cache-dir verl[vllm] -U
43 | 
44 | # Reset pip config
45 | RUN pip config unset global.index-url && \
46 |     pip config unset global.extra-index-url
47 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.rocm:
--------------------------------------------------------------------------------
 1 | #  Build the docker in the repo dir:
 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 .
 3 | # docker images # you can find your built docker
 4 | 
 5 | 
 6 | # Support - Traing: fsdp; Inference: vllm
 7 | # FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
 8 | # Support - Traing: fsdp; Inference: vllm, sglang
 9 | FROM lmsysorg/sglang:v0.4.5-rocm630
10 | 
11 | # Set working directory
12 | # WORKDIR $PWD/app
13 | 
14 | # Set environment variables
15 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
16 | 
17 | ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
18 | ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
19 | ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
20 | 
21 | # Install vllm
22 | RUN pip uninstall -y vllm && \
23 |     rm -rf vllm && \
24 |     git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \
25 |     cd vllm && \
26 |     MAX_JOBS=$(nproc) python3 setup.py install && \
27 |     cd .. && \
28 |     rm -rf vllm
29 | 
30 | # Copy the entire project directory
31 | COPY . .
32 | 
33 | # Install dependencies
34 | RUN pip install "tensordict<0.6" --no-deps && \
35 |     pip install accelerate \
36 |     codetiming \
37 |     datasets \
38 |     dill \
39 |     hydra-core \
40 |     liger-kernel \
41 |     numpy \
42 |     pandas \
43 |     peft \
44 |     "pyarrow>=15.0.0" \
45 |     pylatexenc \
46 |     "ray[data,train,tune,serve]" \
47 |     torchdata \
48 |     transformers \
49 |     wandb \
50 |     orjson \
51 |     pybind11 && \
52 |     pip install -e . --no-deps
53 | 
54 | # Install torch_memory_saver
55 | RUN pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps


--------------------------------------------------------------------------------
/docker/Dockerfile.vemlp.vllm.te:
--------------------------------------------------------------------------------
 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE .
 2 | 
 3 | # the one in docker.io is an alias for the one veturbo
 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124
 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base
 6 | 
 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
 8 | # unset for now
 9 | RUN pip3 config unset global.index-url
10 | 
11 | # transformers 4.47.0 contains the following bug:
12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask'
13 | RUN pip3 install --no-cache-dir \
14 |     torch==2.4.0 \
15 |     accelerate \
16 |     codetiming \
17 |     dill \
18 |     hydra-core \
19 |     numpy \
20 |     pybind11 \
21 |     tensordict \
22 |     "transformers <= 4.46.0"
23 | 
24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation
25 | 
26 | # vllm depends on ray, and veRL does not support ray > 2.37
27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10
28 | 
29 | # install apex
30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
31 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
32 |     git+https://github.com/NVIDIA/apex
33 | 
34 | # install Transformer Engine
35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req
36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM
37 | # - cudnn is required by TransformerEngine
38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \
39 | #     pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0
40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation
41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7
42 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # verl documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and navigate to http://localhost:8000 to view the documentation.


--------------------------------------------------------------------------------
/docs/README_vllm0.8.md:
--------------------------------------------------------------------------------
 1 | # Upgrading to vLLM >= 0.8
 2 | 
 3 | ## Installation
 4 | 
 5 | Note: This version of veRL+vLLM 0.8+ supports **FSDP** for training and **vLLM** for rollout.
 6 | 
 7 | ```bash
 8 | # Create the conda environment
 9 | conda create -n verl python==3.10
10 | conda activate verl
11 | 
12 | # Install verl
13 | git clone https://github.com/volcengine/verl.git
14 | cd verl
15 | pip3 install -e .
16 | 
17 | # Install the latest stable version of vLLM
18 | pip3 install vllm==0.8.3
19 | 
20 | # Install flash-attn
21 | pip3 install flash-attn --no-build-isolation
22 | 
23 | ```
24 | 
25 | We have a pre-built docker image for veRL+vLLM 0.8.3. You can direct import it with the following command:
26 | 
27 | ```bash
28 | docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0
29 | ```
30 | 
31 | ## Features
32 | 
33 | vLLM 0.8+ supports cuda graph and V1 engine by default in veRL. To enable these features, remember to add the following lines to the bash script:
34 | 
35 | ```bash
36 | actor_rollout_ref.rollout.enforce_eager=False \
37 | actor_rollout_ref.rollout.free_cache_engine=False \
38 | ```
39 | 
40 | and also **remove** the environment variable if it exists:
41 | 
42 | ```bash
43 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
44 | # export VLLM_ATTENTION_BACKEND=XFORMERS
45 | ```
46 | 
47 | ## Notes
48 | 
49 | When you just directly upgrade vllm>=0.8, some dependency packages may undergo version changes. If you encounter the following problems:
50 | 
51 | ```bash
52 | in <module> from torch.multiprocessing.reductions import ForkingPickler ImportError: cannot import name 'ForkingPickler' from 'torch.multiprocessing.reductions' (/opt/conda/lib/python3.11/site-packages/torch/multiprocessing/reductions.py)
53 | ```
54 | 
55 | You need to upgrade `tensordict` to version 0.6.2 using the command `pip install tensordict==0.6.2`.
56 | 


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/advance/megatron_extension.rst:
--------------------------------------------------------------------------------
 1 | Add models with the Megatron-LM backend
 2 | =========================================
 3 | 
 4 | Model
 5 | -----------
 6 | 
 7 | The most challenging aspect to use the Megatron-LM backend is implementing
 8 | the models for training. Currently, we implement Llama model that
 9 | support data parallelism, tensor parallelism, pipeline parallelism (also
10 | vPP) and sequence parallelism. We also implement remove padding (sequence packing) on Llama
11 | model, which can be found in `modeling_llama_megatron.py <https://github.com/volcengine/verl/blob/main/verl/models/llama/megatron/modeling_llama_megatron.py>`_.
12 | 
13 | To support other model, users are required to implement:
14 | 
15 | 1. Implemnt a model similar to ``modeling_llama_megatron.py`` that satisfy the
16 |    parallelism requirements of Megatron-LM. Then register your model in
17 |    the `registry.py <https://github.com/volcengine/verl/blob/main/verl/models/registry.py>`_.
18 | 2. Checkpoint utils that can load full checkpoint (e.g. huggingface
19 |    checkpoint) to partitioned models during the runtime. Then register
20 |    your loader to ``weight_loader_registry`` in `weight_loader_registry.py <https://github.com/volcengine/verl/blob/main/verl/models/weight_loader_registry.py>`_.
21 | 3. Weight loader that synchronize the weight from Megatron to rollout
22 |    (vLLM) model. Note that both the actor model and rollout model are
23 |    partitioned during runtime. So, it's advisable to map the model name
24 |    in actor model implementation. Otherwise, you may need an additional
25 |    name mapping and even weight transformation. The weight loader implementation
26 |    is in `megatron_weight_loaders.py <https://github.com/volcengine/verl/blob/main/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py>`_.


--------------------------------------------------------------------------------
/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/docs/bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/docs/bar.png


--------------------------------------------------------------------------------
/docs/fig1_sc2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/docs/fig1_sc2.png


--------------------------------------------------------------------------------
/docs/fig1_screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/docs/fig1_screenshot.png


--------------------------------------------------------------------------------
/docs/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/docs/fig2.png


--------------------------------------------------------------------------------
/docs/logo-deepeyes.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/docs/logo-deepeyes.jpg


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | # markdown suport
 2 | recommonmark
 3 | # markdown table suport
 4 | sphinx-markdown-tables
 5 | 
 6 | # theme default rtd
 7 | 
 8 | # crate-docs-theme
 9 | sphinx-rtd-theme
10 | 
11 | # pin tokenizers version to avoid env_logger version req
12 | tokenizers==0.19.1
13 | 


--------------------------------------------------------------------------------
/eval/EVALUATION.md:
--------------------------------------------------------------------------------
 1 | # Evaluation for DeepEyes
 2 | 
 3 | We provide a evaluation demo for assess your model on V* benchmark with the bbox processing. 
 4 | 
 5 | ## Evaluating Model
 6 | You can use the `eval_vstar.py` to evalate the model with the auto bbox processing. It is worth noting that we firstly deploy model using VLLM. If you want to use transformers to implement your model, you should modify the code and the evaluation process will be slow.
 7 | 
 8 | Here is a sample of the evaluation command：
 9 | ```
10 | python eval_vstar.py \
11 |     --model_name MODEL_NAME \
12 |     --api_key API_KEY \
13 |     --api_url API_URL\
14 |     --vstar_bench_path PATH_TO_VSTAR \
15 |     --save_path PATH_TO_SAVE_DIR \
16 |     --eval_model_name MODEL_NAME_VLLM \
17 |     --num_workers NUM_WORKERS
18 | ```
19 | `MODEL_NAME` is the name of saving, and the evaluation results will be saved at `PATH_TO_SAVE_DIR/MODEL_NAME`. `MODEL_NAME_VLLM` is the name of VLLM server. you can set `MODEL_NAME_VLLM` as None, and will be detected automatically. `API_URL` is the VLLM server port, such as 'http://10.39.19.140:8000/v1'.
20 | 
21 | 
22 | ## Score Calculate
23 | We use the combination of ruled-based evaluation and llm-judge assessment to calculate score. You can use the following command to calculate your results:
24 | 
25 | ```
26 | python judge_result.py \
27 |     --model_name MODEL_NAME \
28 |     --api_key API_KEY \
29 |     --api_url API_URL\
30 |     --vstar_bench_path PATH_TO_VSTAR \
31 |     --save_path PATH_TO_SAVE_DIR \
32 |     --eval_model_name MODEL_NAME_VLLM \
33 |     --num_workers NUM_WORKERS
34 | ```
35 | We use Qwen2.5 72B deployed by VLLM as judge model, so `API_URL` is the address of judge model VLLM server. 
36 | 
37 | 
38 | ## Visualization
39 | We also provide the code `watch_demo.ipynb` to visualize the result. You should modify the `root_path` to the V* bench path and `json_path` to the result jsonl path. Desides, you can modify `line_id` or `tosee_img` to change the case to be visualized.  
40 | 
41 | ## Evaluate HRBench
42 | The evaluation of HRBench is similar to that of V*.
43 | 


--------------------------------------------------------------------------------
/examples/generation/run_deepseek7b_mutli_node.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet
 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet
 5 | model_path=deepseek-ai/deepseek-llm-7b-chat
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=2 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$data_path \
11 |     data.prompt_key=prompt \
12 |     data.n_samples=1 \
13 |     data.output_path=$save_path \
14 |     model.path=$model_path\
15 |     +model.trust_remote_code=True \
16 |     rollout.temperature=1.0 \
17 |     rollout.top_k=50 \
18 |     rollout.top_p=0.7 \
19 |     rollout.prompt_length=2048 \
20 |     rollout.response_length=1024 \
21 |     rollout.tensor_model_parallel_size=16 \
22 |     rollout.gpu_memory_utilization=0.8
23 | 


--------------------------------------------------------------------------------
/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet
 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet
 5 | model_path=deepseek-ai/deepseek-llm-7b-chat
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=1 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$data_path \
11 |     data.prompt_key=prompt \
12 |     data.n_samples=1 \
13 |     data.output_path=$save_path \
14 |     model.path=$model_path \
15 |     +model.trust_remote_code=True \
16 |     rollout.temperature=1.0 \
17 |     rollout.top_k=50 \
18 |     rollout.top_p=0.7 \
19 |     rollout.prompt_length=2048 \
20 |     rollout.response_length=1024 \
21 |     rollout.tensor_model_parallel_size=2 \
22 |     rollout.gpu_memory_utilization=0.8
23 | 


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=1024 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \
17 |     actor_rollout_ref.actor.use_kl_loss=True \
18 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
19 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
20 |     actor_rollout_ref.actor.entropy_coeff=0 \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=5 \
29 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \
30 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
31 |     algorithm.use_kl_in_reward=False \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console'] \
34 |     trainer.project_name='verl_grpo_example_gsm8k' \
35 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=5 \
40 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo --config-path=config \
 4 |     --config-name='ppo_megatron_trainer.yaml'\
 5 |     algorithm.adv_estimator=grpo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=1024 \
11 |     data.filter_overlong_prompts=True \
12 |     data.truncation='error' \
13 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
18 |     actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \
19 |     actor_rollout_ref.actor.megatron.tensor_model_parallel_size=4 \
20 |     actor_rollout_ref.actor.use_kl_loss=True \
21 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
22 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
23 |     actor_rollout_ref.actor.entropy_coeff=0 \
24 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
27 |     actor_rollout_ref.rollout.name=vllm \
28 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
29 |     actor_rollout_ref.rollout.n=5 \
30 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
31 |     algorithm.use_kl_in_reward=False \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_grpo_example_gsm8k' \
35 |     trainer.experiment_name='deepseek_llm_7b_function_rm_megatron' \
36 |     trainer.n_gpus_per_node=16 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=5 \
40 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
17 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
18 |     actor_rollout_ref.actor.use_kl_loss=True \
19 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
20 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
21 |     actor_rollout_ref.actor.entropy_coeff=0 \
22 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
23 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=5 \
29 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
30 |     algorithm.use_kl_in_reward=False \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console','wandb'] \
33 |     trainer.project_name='verl_grpo_example_gsm8k' \
34 |     trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \
35 |     trainer.n_gpus_per_node=8 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=-1 \
38 |     trainer.test_freq=5 \
39 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
 4 | # export VLLM_ATTENTION_BACKEND=XFORMERS
 5 | 
 6 | python3 -m verl.trainer.main_ppo \
 7 |     algorithm.adv_estimator=grpo \
 8 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 9 |     data.val_files=$HOME/data/gsm8k/test.parquet \
10 |     data.train_batch_size=1024 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=1024 \
13 |     data.filter_overlong_prompts=True \
14 |     data.truncation='error' \
15 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
16 |     actor_rollout_ref.actor.optim.lr=1e-6 \
17 |     actor_rollout_ref.model.use_remove_padding=True \
18 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
19 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
20 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
21 |     actor_rollout_ref.actor.use_kl_loss=True \
22 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
23 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
24 |     actor_rollout_ref.actor.entropy_coeff=0 \
25 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
26 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
27 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
29 |     actor_rollout_ref.rollout.name=vllm \
30 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
31 |     actor_rollout_ref.rollout.n=5 \
32 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
33 |     algorithm.use_kl_in_reward=False \
34 |     trainer.critic_warmup=0 \
35 |     trainer.logger=['console','wandb'] \
36 |     trainer.project_name='verl_grpo_example_gsm8k' \
37 |     trainer.experiment_name='qwen2_7b_function_rm_kl1e-3' \
38 |     trainer.val_before_train=False \
39 |     trainer.n_gpus_per_node=8 \
40 |     trainer.nnodes=1 \
41 |     trainer.save_freq=-1 \
42 |     trainer.test_freq=5 \
43 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=gae \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.actor.use_kl_loss=False \
20 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
21 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
22 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
23 |     actor_rollout_ref.rollout.name=vllm \
24 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
25 |     critic.optim.lr=1e-5 \
26 |     critic.model.use_remove_padding=True \
27 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
28 |     critic.model.enable_gradient_checkpointing=True \
29 |     critic.ppo_micro_batch_size_per_gpu=32 \
30 |     critic.model.fsdp_config.param_offload=False \
31 |     critic.model.fsdp_config.optimizer_offload=False \
32 |     algorithm.use_kl_in_reward=False \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console','wandb'] \
35 |     trainer.project_name='verl_example_gsm8k' \
36 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=1 \
41 |     trainer.total_epochs=15 $@
42 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | VERL_USE_MODELSCOPE=True \
 4 | python3 -m verl.trainer.main_ppo \
 5 |     algorithm.adv_estimator=gae \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     data.filter_overlong_prompts=True \
12 |     data.truncation='error' \
13 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
18 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
19 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
20 |     actor_rollout_ref.actor.use_kl_loss=False \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
23 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
24 |     actor_rollout_ref.rollout.name=vllm \
25 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
26 |     critic.optim.lr=1e-5 \
27 |     critic.model.use_remove_padding=True \
28 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
29 |     critic.model.enable_gradient_checkpointing=True \
30 |     critic.ppo_micro_batch_size_per_gpu=32 \
31 |     critic.model.fsdp_config.param_offload=False \
32 |     critic.model.fsdp_config.optimizer_offload=False \
33 |     algorithm.use_kl_in_reward=False \
34 |     trainer.critic_warmup=0 \
35 |     trainer.logger=['console','wandb'] \
36 |     trainer.project_name='verl_example_gsm8k' \
37 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
38 |     trainer.n_gpus_per_node=8 \
39 |     trainer.nnodes=1 \
40 |     trainer.save_freq=-1 \
41 |     trainer.test_freq=1 \
42 |     trainer.total_epochs=15 $@
43 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | train_files=$HOME/data/full_hh_rlhf/rl/train.parquet
 4 | test_files=$HOME/data/full_hh_rlhf/rl/train.parquet # no use
 5 | 
 6 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
 7 |     algorithm.adv_estimator=gae \
 8 |     data.train_files="$train_files" \
 9 |     data.val_files="$test_files" \
10 |     data.train_batch_size=512 \
11 |     data.max_prompt_length=128 \
12 |     data.max_response_length=128 \
13 |     data.filter_overlong_prompts=True \
14 |     data.truncation='error' \
15 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
16 |     actor_rollout_ref.actor.optim.lr=1e-6 \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
19 |     actor_rollout_ref.actor.use_kl_loss=False \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
26 |     critic.model.enable_gradient_checkpointing=False \
27 |     critic.ppo_micro_batch_size_per_gpu=4 \
28 |     reward_model.enable=True \
29 |     reward_model.megatron.tensor_model_parallel_size=4 \
30 |     reward_model.model.path=deepseek-ai/deepseek-llm-7b-chat \
31 |     reward_model.micro_batch_size_per_gpu=4 \
32 |     reward_model.param_offload=False \
33 |     algorithm.use_kl_in_reward=False \
34 |     trainer.critic_warmup=0 \
35 |     trainer.logger=['console','wandb'] \
36 |     trainer.project_name='verl_megatron_full_hh_rlhf_examples' \
37 |     trainer.experiment_name='deepseek_llm_7b_model_rm' \
38 |     trainer.n_gpus_per_node=8 \
39 |     trainer.nnodes=1 \
40 |     trainer.save_freq=-1 \
41 |     trainer.test_freq=5 \
42 |     trainer.total_epochs=100 $@
43 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 5 | math_train_path=$HOME/data/math/train.parquet
 6 | math_test_path=$HOME/data/math/test.parquet
 7 | 
 8 | train_files="['$gsm8k_train_path', '$math_train_path']"
 9 | test_files="['$gsm8k_test_path', '$math_test_path']"
10 | 
11 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
12 |     algorithm.adv_estimator=gae \
13 |     data.train_files="$train_files" \
14 |     data.val_files="$test_files" \
15 |     data.train_batch_size=1024 \
16 |     data.max_prompt_length=1024 \
17 |     data.max_response_length=512 \
18 |     data.filter_overlong_prompts=True \
19 |     data.truncation='error' \
20 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
21 |     actor_rollout_ref.actor.optim.lr=1e-6 \
22 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
23 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
24 |     actor_rollout_ref.actor.use_kl_loss=False \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
27 |     actor_rollout_ref.rollout.name=vllm \
28 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
29 |     critic.optim.lr=1e-5 \
30 |     critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
31 |     critic.model.enable_gradient_checkpointing=False \
32 |     critic.ppo_micro_batch_size_per_gpu=4 \
33 |     algorithm.use_kl_in_reward=False \
34 |     trainer.critic_warmup=0 \
35 |     trainer.logger=['console','wandb'] \
36 |     trainer.project_name='verl_megatron_math_gsm8k_examples' \
37 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
38 |     trainer.n_gpus_per_node=8 \
39 |     trainer.nnodes=1 \
40 |     trainer.save_freq=-1 \
41 |     trainer.test_freq=5 \
42 |     trainer.total_epochs=100 $@
43 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_gemma.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=gae \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=512 \
 8 |     data.max_prompt_length=1024 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=google/gemma-2-2b-it \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=False \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.actor.use_kl_loss=False \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.use_remove_padding=False \
26 |     critic.model.path=google/gemma-2-2b-it \
27 |     critic.model.enable_gradient_checkpointing=False \
28 |     critic.ppo_micro_batch_size_per_gpu=4 \
29 |     critic.model.fsdp_config.param_offload=False \
30 |     critic.model.fsdp_config.optimizer_offload=False \
31 |     algorithm.use_kl_in_reward=False \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_example' \
35 |     trainer.experiment_name='gemma2b_function_rm' \
36 |     trainer.n_gpus_per_node=2 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=10 \
40 |     trainer.total_epochs=15 $@
41 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
 4 | # export VLLM_ATTENTION_BACKEND=XFORMERS
 5 | 
 6 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 7 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 8 | math_train_path=$HOME/data/math/train.parquet
 9 | math_test_path=$HOME/data/math/test.parquet
10 | 
11 | train_files="['$gsm8k_train_path', '$math_train_path']"
12 | test_files="['$gsm8k_test_path', '$math_test_path']"
13 | 
14 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
15 |     algorithm.adv_estimator=gae \
16 |     data.train_files="$train_files" \
17 |     data.val_files="$test_files" \
18 |     data.train_batch_size=1024 \
19 |     data.max_prompt_length=1024 \
20 |     data.max_response_length=512 \
21 |     data.filter_overlong_prompts=True \
22 |     data.truncation='error' \
23 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
24 |     actor_rollout_ref.actor.optim.lr=1e-6 \
25 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
26 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
27 |     actor_rollout_ref.actor.use_kl_loss=False \
28 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
29 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
30 |     actor_rollout_ref.rollout.name=vllm \
31 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
32 |     critic.optim.lr=1e-5 \
33 |     critic.model.path=Qwen/Qwen2-7B-Instruct \
34 |     critic.model.enable_gradient_checkpointing=False \
35 |     critic.ppo_micro_batch_size_per_gpu=4 \
36 |     algorithm.use_kl_in_reward=False \
37 |     trainer.critic_warmup=0 \
38 |     trainer.logger=['console','wandb'] \
39 |     trainer.project_name='verl_megatron_math_gsm8k_examples' \
40 |     trainer.experiment_name='qwen2_7b_function_rm' \
41 |     trainer.n_gpus_per_node=8 \
42 |     trainer.nnodes=1 \
43 |     trainer.save_freq=-1 \
44 |     trainer.test_freq=5 \
45 |     trainer.total_epochs=100 $@
46 | 


--------------------------------------------------------------------------------
/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export HF_DATASETS_OFFLINE=1
 4 | export TRANSFORMERS_OFFLINE=1
 5 | 
 6 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
 7 | # export VLLM_ATTENTION_BACKEND=XFORMERS
 8 | 
 9 | python3 -m verl.trainer.main_ppo \
10 |     algorithm.adv_estimator=remax \
11 |     data.train_files=$HOME/data/gsm8k/train.parquet \
12 |     data.val_files=$HOME/data/gsm8k/test.parquet \
13 |     data.train_batch_size=512 \
14 |     data.max_prompt_length=512 \
15 |     data.max_response_length=1024 \
16 |     data.filter_overlong_prompts=True \
17 |     data.truncation='error' \
18 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct \
19 |     actor_rollout_ref.actor.optim.lr=1e-6 \
20 |     actor_rollout_ref.model.use_remove_padding=True \
21 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
22 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
23 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=30000 \
24 |     actor_rollout_ref.actor.use_kl_loss=False \
25 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
26 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
27 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
29 |     actor_rollout_ref.rollout.name=vllm \
30 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
31 |     actor_rollout_ref.rollout.n=4 \
32 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
33 |     algorithm.use_kl_in_reward=True \
34 |     algorithm.kl_penalty=kl \
35 |     algorithm.kl_ctrl.kl_coef=0.001 \
36 |     trainer.critic_warmup=0 \
37 |     trainer.logger=['console','wandb'] \
38 |     trainer.project_name='verl_remax_example_gsm8k' \
39 |     trainer.experiment_name='qwen2.5_3b_function_rm_kl1e-3' \
40 |     trainer.val_before_train=False \
41 |     trainer.n_gpus_per_node=8 \
42 |     trainer.nnodes=1 \
43 |     trainer.save_freq=-1 \
44 |     trainer.test_freq=5 \
45 |     trainer.total_epochs=5 $@
46 | 


--------------------------------------------------------------------------------
/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export HF_DATASETS_OFFLINE=1
 4 | export TRANSFORMERS_OFFLINE=1
 5 | 
 6 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
 7 | # export VLLM_ATTENTION_BACKEND=XFORMERS
 8 | 
 9 | python3 -m verl.trainer.main_ppo \
10 |     algorithm.adv_estimator=remax \
11 |     data.train_files=$HOME/data/gsm8k/train.parquet \
12 |     data.val_files=$HOME/data/gsm8k/test.parquet \
13 |     data.train_batch_size=1024 \
14 |     data.max_prompt_length=512 \
15 |     data.max_response_length=1024 \
16 |     data.filter_overlong_prompts=True \
17 |     data.truncation='error' \
18 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct \
19 |     actor_rollout_ref.actor.optim.lr=1e-6 \
20 |     actor_rollout_ref.model.use_remove_padding=True \
21 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
22 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
23 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
24 |     actor_rollout_ref.actor.use_kl_loss=False \
25 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
26 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
27 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
29 |     actor_rollout_ref.rollout.name=vllm \
30 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
31 |     actor_rollout_ref.rollout.n=4 \
32 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
33 |     algorithm.use_kl_in_reward=True \
34 |     algorithm.kl_penalty=kl \
35 |     algorithm.kl_ctrl.kl_coef=0.001 \
36 |     trainer.critic_warmup=0 \
37 |     trainer.logger=['console','wandb'] \
38 |     trainer.project_name='verl_remax_example_gsm8k' \
39 |     trainer.experiment_name='qwen2.5_7b_function_rm_kl1e-3' \
40 |     trainer.val_before_train=False \
41 |     trainer.n_gpus_per_node=8 \
42 |     trainer.nnodes=1 \
43 |     trainer.save_freq=-1 \
44 |     trainer.test_freq=5 \
45 |     trainer.total_epochs=10 $@
46 | 


--------------------------------------------------------------------------------
/examples/rloo_trainer/run_qwen2-7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
 4 | # export VLLM_ATTENTION_BACKEND=XFORMERS
 5 | 
 6 | python3 -m verl.trainer.main_ppo \
 7 |     algorithm.adv_estimator=rloo \
 8 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 9 |     data.val_files=$HOME/data/gsm8k/test.parquet \
10 |     data.train_batch_size=1024 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=1024 \
13 |     data.filter_overlong_prompts=True \
14 |     data.truncation='error' \
15 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
16 |     actor_rollout_ref.actor.optim.lr=1e-6 \
17 |     actor_rollout_ref.model.use_remove_padding=True \
18 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
19 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \
20 |     actor_rollout_ref.actor.use_kl_loss=False \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
27 |     actor_rollout_ref.rollout.name=vllm \
28 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
29 |     actor_rollout_ref.rollout.n=5 \
30 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \
31 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
32 |     algorithm.use_kl_in_reward=True \
33 |     algorithm.kl_penalty=kl \
34 |     algorithm.kl_ctrl.kl_coef=0.001 \
35 |     trainer.critic_warmup=0 \
36 |     trainer.logger=['console','wandb'] \
37 |     trainer.project_name='verl_rloo_example_gsm8k' \
38 |     trainer.experiment_name='qwen2_7b_function_rm' \
39 |     trainer.n_gpus_per_node=8 \
40 |     trainer.nnodes=1 \
41 |     trainer.save_freq=-1 \
42 |     trainer.test_freq=5 \
43 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_deepseek_6b7.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     data.prompt_dict_keys=['question'] \
21 |     +data.response_dict_keys=['answer'] \
22 |     data.micro_batch_size_per_gpu=4 \
23 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
24 |     trainer.default_local_dir=$save_path \
25 |     trainer.project_name=gsm8k-sft \
26 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
27 |     trainer.total_epochs=4 \
28 |     trainer.logger=['console','wandb'] \
29 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_2b.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_gemma_2b.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     data.prompt_dict_keys=['question'] \
23 |     +data.response_dict_keys=['answer'] \
24 |     data.micro_batch_size_per_gpu=4 \
25 |     model.partial_pretrain=google/gemma-2b-it \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
29 |     trainer.total_epochs=2 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_gemma_7b.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=prompt \
19 |     data.response_key=answer \
20 |     data.micro_batch_size_per_gpu=4 \
21 |     model.partial_pretrain=google/gemma-1.1-7b-it \
22 |     trainer.default_local_dir=$save_path \
23 |     trainer.project_name=gsm8k-sft \
24 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
25 |     trainer.total_epochs=4 \
26 |     trainer.logger=['console','wandb'] \
27 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_qwen_05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_epochs=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     trainer.default_local_dir=$save_path \
26 |     trainer.project_name=gsm8k-sft \
27 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \
28 |     trainer.logger=['console'] \
29 |     trainer.total_training_steps=1 \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/examples/sft/multiturn/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | if [ "$#" -lt 2 ]; then
 5 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | nproc_per_node=$1
10 | save_path=$2
11 | 
12 | # Shift the arguments so $@ refers to the rest
13 | shift 2
14 | 
15 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
16 |      -m verl.trainer.fsdp_sft_trainer \
17 |     data.train_files=$HOME/data/multiturn/train.parquet \
18 |     data.val_files=$HOME/data/multiturn/test.parquet \
19 |     data.multiturn.enable=true \
20 |     data.multiturn.messages_key=messages \
21 |     data.micro_batch_size=4 \
22 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
23 |     trainer.default_local_dir=$save_path \
24 |     trainer.project_name=multiturn-sft \
25 |     trainer.experiment_name=multiturn-sft-qwen-2.5-0.5b-instruct-sp2 \
26 |     trainer.logger=['console'] \
27 |     trainer.total_training_steps=1 \
28 |     trainer.default_hdfs_dir=null $@ \
29 |     ulysses_sequence_parallel_size=2 \
30 |     use_remove_padding=true


--------------------------------------------------------------------------------
/examples/split_placement/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 main_ppo_split.py \
 4 |     algorithm.adv_estimator=gae \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.actor.use_kl_loss=False \
19 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
20 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
21 |     actor_rollout_ref.rollout.name=vllm \
22 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
23 |     critic.optim.lr=1e-5 \
24 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
25 |     critic.model.enable_gradient_checkpointing=False \
26 |     critic.ppo_micro_batch_size_per_gpu=8 \
27 |     critic.model.fsdp_config.param_offload=False \
28 |     critic.model.fsdp_config.optimizer_offload=False \
29 |     algorithm.use_kl_in_reward=False \
30 |     trainer.critic_warmup=0 \
31 |     trainer.logger=['console','wandb'] \
32 |     trainer.project_name='verl_example_gsm8k' \
33 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
34 |     trainer.n_gpus_per_node=8 \
35 |     trainer.nnodes=1 \
36 |     trainer.save_freq=-1 \
37 |     trainer.total_epochs=15 $@
38 | 


--------------------------------------------------------------------------------
/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/rlhf/gsm8k/train.parquet
 4 | gsm8k_val_path=$HOME/data/rlhf/math/test.parquet
 5 | model_path=Qwen/Qwen2-72B-Instruct
 6 | 
 7 | python3 -m verl.trainer.main_ppo \
 8 |     algorithm.adv_estimator=grpo \
 9 |     data.train_files=$data_path \
10 |     data.val_files=$gsm8k_val_path \
11 |     data.train_batch_size=1024 \
12 |     data.max_prompt_length=512 \
13 |     data.max_response_length=512 \
14 |     data.filter_overlong_prompts=True \
15 |     data.truncation='error' \
16 |     actor_rollout_ref.model.path=model_path \
17 |     actor_rollout_ref.actor.optim.lr=1e-6 \
18 |     actor_rollout_ref.model.use_remove_padding=True \
19 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
20 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
21 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
22 |     actor_rollout_ref.actor.use_kl_loss=True \
23 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
24 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
25 |     actor_rollout_ref.actor.entropy_coeff=0 \
26 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
27 |     actor_rollout_ref.actor.fsdp_config.param_offload=True \
28 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
29 |     actor_rollout_ref.rollout.tensor_model_parallel_size=16 \
30 |     actor_rollout_ref.rollout.name=vllm \
31 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
32 |     actor_rollout_ref.rollout.n=5 \
33 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
34 |     algorithm.use_kl_in_reward=False \
35 |     trainer.critic_warmup=0 \
36 |     trainer.logger=['console','wandb'] \
37 |     trainer.project_name='verl_grpo_example_gsm8k' \
38 |     trainer.experiment_name='Qwen2_72B_Instruct' \
39 |     trainer.n_gpus_per_node=8 \
40 |     trainer.nnodes=4 \
41 |     trainer.save_freq=-1 \
42 |     trainer.test_freq=5 \
43 |     trainer.total_epochs=1 $@


--------------------------------------------------------------------------------
/examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | #### important: vllm version must be >= 0.8.3
 4 | 
 5 | gsm8k_train_path=$HOME/data/rlhf/gsm8k/train.parquet
 6 | gsm8k_val_path=$HOME/data/rlhf/math/test.parquet
 7 | model_path=Qwen/Qwen2-72B-Instruct
 8 | 
 9 | python3 -m verl.trainer.main_ppo \
10 |     algorithm.adv_estimator=grpo \
11 |     data.train_files=$gsm8k_train_path \
12 |     data.val_files=$gsm8k_val_path \
13 |     data.train_batch_size=1024 \
14 |     data.max_prompt_length=512 \
15 |     data.max_response_length=512 \
16 |     data.filter_overlong_prompts=True \
17 |     data.truncation='error' \
18 |     actor_rollout_ref.model.path=$model_path \
19 |     actor_rollout_ref.actor.optim.lr=1e-6 \
20 |     actor_rollout_ref.model.use_remove_padding=True \
21 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
22 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
23 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
24 |     actor_rollout_ref.actor.use_kl_loss=True \
25 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
26 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
27 |     actor_rollout_ref.actor.entropy_coeff=0 \
28 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
29 |     actor_rollout_ref.actor.fsdp_config.param_offload=True \
30 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
31 |     actor_rollout_ref.rollout.tensor_model_parallel_size=16 \
32 |     actor_rollout_ref.rollout.name=vllm \
33 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
34 |     actor_rollout_ref.rollout.n=5 \
35 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
36 |     algorithm.use_kl_in_reward=False \
37 |     trainer.critic_warmup=0 \
38 |     trainer.logger=['console','wandb'] \
39 |     trainer.project_name='verl_grpo_example_gsm8k' \
40 |     trainer.experiment_name='Qwen2_72B_Instruct' \
41 |     trainer.n_gpus_per_node=8 \
42 |     trainer.nnodes=4 \
43 |     trainer.save_freq=-1 \
44 |     trainer.test_freq=5 \
45 |     trainer.total_epochs=1 $@


--------------------------------------------------------------------------------
/recipe/dapo/prepare_dapo_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -uxo pipefail
 3 | 
 4 | export VERL_HOME=${VERL_HOME:-"${HOME}/verl"}
 5 | export TRAIN_FILE=${TRAIN_FILE:-"${VERL_HOME}/data/dapo-math-17k.parquet"}
 6 | export TEST_FILE=${TEST_FILE:-"${VERL_HOME}/data/aime-2024.parquet"}
 7 | export OVERWRITE=${OVERWRITE:-0}
 8 | 
 9 | mkdir -p "${VERL_HOME}/data"
10 | 
11 | if [ ! -f "${TRAIN_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
12 |   wget -O "${TRAIN_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k/resolve/main/data/dapo-math-17k.parquet?download=true"
13 | fi
14 | 
15 | if [ ! -f "${TEST_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
16 |   wget -O "${TEST_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/AIME-2024/resolve/main/data/aime-2024.parquet?download=true"
17 | fi
18 | 


--------------------------------------------------------------------------------
/recipe/dapo/src/config/dapo_trainer.yaml:
--------------------------------------------------------------------------------
 1 | hydra:
 2 |   searchpath:
 3 |     - file://verl/trainer/config
 4 | 
 5 | defaults:
 6 |   - ppo_trainer
 7 |   - _self_
 8 | 
 9 | data:
10 |   gen_batch_size: ${data.train_batch_size}
11 | 
12 | reward_model:
13 |   reward_manager: dapo
14 |   overlong_buffer: 
15 |     enable: False # We try to avoid forgetting to set enable
16 |     len: 0
17 |     penalty_factor: 0.0
18 |     log: False
19 | 
20 | algorithm:
21 |   filter_groups:
22 |     enable: False # We try to avoid forgetting to set enable
23 |     metric: null # acc / score / seq_reward / seq_final_reward / ...
24 |     max_num_gen_batches: 0 # Non-positive values mean no upper limit
25 | 
26 | trainer:
27 |   project_name: verl-dapo
28 | 


--------------------------------------------------------------------------------
/recipe/drgrpo/README.md:
--------------------------------------------------------------------------------
 1 | # Dr. GRPO Open-Source Implementation
 2 | 
 3 | 
 4 | https://github.com/sail-sg/understand-r1-zero
 5 | 
 6 | 
 7 | This paper suggests a way to calculate the unbiased policy gradient.
 8 | 
 9 | 
10 | ## Configuration
11 | ```yaml
12 | actor_rollout_ref:
13 |   actor:
14 |     loss_agg_mode: "seq-mean-token-sum-norm" # turn off seq-dim averaging
15 |     use_kl_loss: False
16 | algorithm:
17 |   norm_adv_by_std_in_grpo: False # turn off standard deviation norm
18 | ```
19 | 
20 | , with all other parameters set same as GRPO.
21 | 


--------------------------------------------------------------------------------
/recipe/prime/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/recipe/r1/README.md:
--------------------------------------------------------------------------------
 1 | # DeepSeek R1 Reproduction
 2 | 
 3 | This recipe is under development, if you are interested, checkout the TODO list and join this project! https://github.com/volcengine/verl/issues/708 
 4 | 
 5 | ## Reproducing Evaluation
 6 | 
 7 | Eval Results of DS-R1-Distill-Qwen2.5-1.5B (k=8)
 8 | 
 9 | Dataset | Test Results | Reported
10 | -- | -- | --
11 | GPQA Diamond | 35.3 | 33.8
12 | LiveCodeBench | 16.9 | 16.9
13 | AIME 2024 | 30.4 | 28.9
14 | CNMO 2024 (en) | 45.1 | -
15 | CNMO 2024 (zh) | 41.0 | -
16 | 
17 | ---
18 | 
19 | Eval Results (DS-R1)
20 | 
21 | Dataset | Test Results (k=1) | Test Results (k=4) | Reported
22 | -- | -- | -- | --
23 | GPQA Diamond | 67.7 | 69.6 | 71.5
24 | LiveCodeBench | 64.7 | 63.1 | 65.9
25 | AIME 2024 | 86.7 | 79.2 | 79.8
26 | CNMO 2024 | 75.0 | 78.5 | 78.8
27 | 


--------------------------------------------------------------------------------
/recipe/r1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/recipe/r1/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 
12 | ray_init:
13 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.


--------------------------------------------------------------------------------
/recipe/r1/reward_score.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def reward_func(data_source, solution_str, ground_truth, extra_info=None):
17 |     if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]:
18 |         from recipe.r1.tasks import math
19 | 
20 |         return math.compute_score(solution_str, ground_truth)
21 |     elif data_source == "Idavidrein/gpqa":
22 |         from recipe.r1.tasks import gpqa
23 | 
24 |         return gpqa.compute_score(solution_str, ground_truth)
25 |     elif data_source in ["livecodebench/code_generation_lite", "livecodebench/code_generation"]:
26 |         from recipe.r1.tasks import livecodebench
27 | 
28 |         return livecodebench.compute_score(solution_str, ground_truth)
29 |     else:
30 |         raise NotImplementedError
31 | 


--------------------------------------------------------------------------------
/recipe/r1/run_r1_distill_qwen.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH=Qwen/DeepSeek-R1-Distill-Qwen-1.5B
 2 | DATA_PATH=/workspace/datasets/r1_bench
 3 | 
 4 | # Eval Data Process
 5 | python3 -m recipe.r1.data_process \
 6 |     --local_dir $DATA_PATH \
 7 |     --tasks all
 8 | 
 9 | # Generation
10 | python3 -m verl.trainer.main_generation \
11 |     trainer.nnodes=1 \
12 |     trainer.n_gpus_per_node=8 \
13 |     data.path=$DATA_PATH/test.parquet \
14 |     data.prompt_key=prompt \
15 |     data.batch_size=1024 \
16 |     data.n_samples=8 \
17 |     data.output_path=$DATA_PATH/test-output-8.parquet \
18 |     model.path=$MODEL_PATH \
19 |     rollout.temperature=0.6 \
20 |     rollout.top_p=0.95 \
21 |     rollout.prompt_length=1024 \
22 |     rollout.response_length=32768 \
23 |     rollout.tensor_model_parallel_size=1 \
24 |     rollout.gpu_memory_utilization=0.9 \
25 |     rollout.max_num_batched_tokens=65536
26 | 
27 | # Evaluation
28 | python3 -m recipe.r1.main_eval \
29 |     data.path=$DATA_PATH/test-output-8.parquet \
30 |     data.prompt_key=prompt \
31 |     data.response_key=responses \
32 |     custom_reward_function.path=recipe/r1/reward_score.py \
33 |     custom_reward_function.name=reward_func
34 | 


--------------------------------------------------------------------------------
/recipe/r1/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/recipe/r1/tasks/gpqa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | # Extraction Template from https://github.com/openai/simple-evals/blob/90e3e821cabba2aeb6be651dcb662b253df04225/common.py#L25
18 | ANSWER_PATTERN_MULTICHOICE = r"(?i)Answer[ \t]*:[ \t]*\$?([A-D])\$?"
19 | 
20 | 
21 | def compute_score(solution_str, ground_truth) -> float:
22 |     match = re.search(ANSWER_PATTERN_MULTICHOICE, solution_str)
23 |     extracted_answer = match.group(1) if match else None
24 |     score = 1.0 if extracted_answer == ground_truth else 0.0
25 |     return score
26 | 


--------------------------------------------------------------------------------
/recipe/r1/tasks/math.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import contextlib
15 | 
16 | try:
17 |     from math_verify.metric import math_metric
18 |     from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
19 | except ImportError:
20 |     print("To use Math-Verify, please install it first by running `pip install math-verify`.")
21 | 
22 | 
23 | def compute_score(model_output: str, ground_truth: str) -> bool:
24 |     verify_func = math_metric(
25 |         gold_extraction_target=(LatexExtractionConfig(),),
26 |         pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
27 |     )
28 |     ret_score = 0.0
29 | 
30 |     # Wrap the ground truth in \boxed{} format for verification
31 |     ground_truth_boxed = "\\boxed{" + ground_truth + "}"
32 |     with contextlib.suppress(Exception):
33 |         ret_score, _ = verify_func([ground_truth_boxed], [model_output])
34 | 
35 |     return ret_score
36 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | liger-kernel
 9 | numpy
10 | pandas
11 | datasets
12 | peft
13 | pyarrow>=15.0.0
14 | pybind11
15 | pylatexenc
16 | pre-commit
17 | ray[default]
18 | tensordict<=0.6.2
19 | torchdata
20 | transformers
21 | # vllm==0.6.3.post1
22 | wandb
23 | 


--------------------------------------------------------------------------------
/requirements_sglang.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | numpy
 9 | pandas
10 | datasets
11 | peft
12 | pyarrow>=15.0.0
13 | pybind11
14 | pylatexenc
15 | ray[default]>=2.10
16 | tensordict<=0.6.2
17 | torchdata
18 | torchvision
19 | transformers
20 | wandb
21 | sglang[all]==0.4.4.post4
22 | torch-memory-saver>=0.0.5


--------------------------------------------------------------------------------
/scripts/install_deepeyes.sh:
--------------------------------------------------------------------------------
 1 | pip install evaluate
 2 | pip install vllm==0.8.2
 3 | pip install -U pynvml
 4 | pip install mathruler
 5 | pip install pydantic --upgrade
 6 | pip install openai --upgrade
 7 | pip install tensordict==0.6.2
 8 | pip install triton==3.1.0
 9 | pip install qwen_vl_utils
10 | pip install math_verify
11 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/distributed/run_all.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #!/usr/bin/env bash
16 | 
17 | set -e -x
18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/create_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from torch.utils import data
18 | 
19 | from tests.e2e.envs.digit_completion import DigitCompletion
20 | 
21 | if __name__ == "__main__":
22 |     simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9)
23 |     all_prompts = simple_task.get_all_prompts()
24 | 
25 |     # 21 * 6 * 4
26 |     train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2])
27 |     train_data = list(train_data)
28 |     test_data = list(test_data)
29 | 
30 |     train_data = [[{"role": "user", "content": str(item)}] for item in train_data]
31 |     test_data = [[{"role": "user", "content": str(item)}] for item in test_data]
32 | 
33 |     print(f"Size of train: {len(train_data)}, size of test: {len(test_data)}")
34 | 
35 |     train_data = {"prompt": train_data}
36 |     test_data = {"prompt": test_data}
37 | 
38 |     model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)))
39 | 
40 |     import pandas as pd
41 | 
42 |     train_data_frame = pd.DataFrame(train_data)
43 |     test_data_frame = pd.DataFrame(test_data)
44 | 
45 |     train_data_frame.to_parquet(os.path.join(model_folder, "train.parquet"))
46 |     test_data_frame.to_parquet(os.path.join(model_folder, "test.parquet"))
47 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/tests/e2e/arithmetic_sequence/data/test.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/tests/e2e/arithmetic_sequence/data/train.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "LlamaForCausalLM"
 4 |   ],
 5 |   "attention_bias": false,
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": null,
 8 |   "eos_token_id": 1,
 9 |   "hidden_act": "silu",
10 |   "hidden_size": 128,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 344,
13 |   "max_position_embeddings": 2048,
14 |   "mlp_bias": false,
15 |   "model_type": "llama",
16 |   "num_attention_heads": 4,
17 |   "num_hidden_layers": 4,
18 |   "num_key_value_heads": 4,
19 |   "pad_token_id": 2,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-06,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.43.3",
27 |   "use_cache": true,
28 |   "vocab_size": 16
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "eos_token_id": 1,
4 |   "pad_token_id": 2,
5 |   "transformers_version": "4.43.3"
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "char_ords": [
 3 |         48,
 4 |         49,
 5 |         50,
 6 |         51,
 7 |         52,
 8 |         53,
 9 |         54,
10 |         55,
11 |         56,
12 |         57,
13 |         44,
14 |         58
15 |     ],
16 |     "model_max_length": 2048,
17 |     "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}"
18 | }


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
 1 | # Digit completion
 2 | 
 3 | This is an example of solving a digit completion problem. The problem is defined as below:
 4 | 
 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers.
 6 | If the max number is reached, the next number should be modulo with max number.
 7 | 
 8 | For example,
 9 | - prompt = [1, 2, 3]
10 | - N = 5
11 | - max_number = 6
12 | 
13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1].
14 | 
15 | # Environment definition
16 | 
17 | The core definition of the task is defined in verl/envs/digit_completion/task.py
18 | 
19 | It is highly recommended to take a look at it for better understanding.
20 | 
21 | 
22 | 
23 | # Run experiments
24 | 
25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory)
26 | 
27 | ```bash
28 | # cd examples/arithmetic_sequence/rl
29 | 
30 | # Specify the config path and config name (current working dir)
31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron'
32 | 
33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using:
34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config
35 | 
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/e2e/check_custom_rwd_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | 
18 | def check_congratulations_in_file(output_file):
19 |     with open(output_file) as f:
20 |         output = f.read()
21 | 
22 |     success_message = "Congratulations!!! You have called my_reward_function successfully!!!"
23 |     assert success_message in output, f"Success message of my_reward_function not found in {output_file}"
24 |     print("Check passes")
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     parser = argparse.ArgumentParser()
29 |     parser.add_argument("--output_file", required=True, type=str)
30 | 
31 |     args = parser.parse_args()
32 | 
33 |     check_congratulations_in_file(args.output_file)
34 | 


--------------------------------------------------------------------------------
/tests/e2e/check_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | import numpy as np
18 | 
19 | 
20 | def extract_reward_from_line(line):
21 |     # TODO: this function needs error handling
22 |     try:
23 |         key_vals = line.split(" - ")
24 |         for key_val in key_vals:
25 |             key, val = key_val.split(":")
26 |             if key == "critic/rewards/mean":
27 |                 reward = float(val)
28 |                 return reward
29 |         return -np.inf
30 |     except Exception:
31 |         return -np.inf
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument("--output_file", required=True, type=str)
37 |     parser.add_argument("--target", type=float, default=0.2, help="target reward score")
38 | 
39 |     args = parser.parse_args()
40 | 
41 |     with open(args.output_file) as f:
42 |         output = f.read().split("\n")
43 | 
44 |     best_reward = -np.inf
45 |     for line in output:
46 |         if line.startswith("step"):
47 |             reward = extract_reward_from_line(line)
48 |             if reward > best_reward:
49 |                 best_reward = reward
50 | 
51 |     print(f"Best reward is {best_reward}")
52 |     assert best_reward > args.target, f"Best reward must be greater than {args.target}. best_reward: {best_reward}"
53 |     print("Check passes")
54 | 


--------------------------------------------------------------------------------
/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .digit_completion import DigitCompletion
16 | 
17 | __all__ = ["DigitCompletion"]
18 | 


--------------------------------------------------------------------------------
/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from transformers import AutoTokenizer, LlamaConfig
16 | 
17 | from .task import DigitCompletion, generate_ground_truth_response
18 | from .tokenizer import CharTokenizer
19 | 
20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True)
21 | 
22 | __all__ = ["DigitCompletion", "generate_ground_truth_response", "CharTokenizer"]
23 | 


--------------------------------------------------------------------------------
/tests/e2e/run_r1_distill_qwen_aime24_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -xeuo pipefail
 3 | 
 4 | huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
 5 |     --local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=1 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$HOME/data/r1/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.batch_size=1024 \
13 |     data.n_samples=1 \
14 |     data.output_path=$HOME/data/r1/test-output-k1.parquet \
15 |     model.path=$HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
16 |     rollout.temperature=0.6 \
17 |     rollout.top_p=0.95 \
18 |     rollout.prompt_length=1024 \
19 |     rollout.response_length=32768 \
20 |     rollout.tensor_model_parallel_size=1 \
21 |     rollout.gpu_memory_utilization=0.95 \
22 |     rollout.max_num_batched_tokens=65536 \
23 |     rollout.enforce_eager=False \
24 |     rollout.free_cache_engine=False
25 | 
26 | python3 -m recipe.r1.main_eval \
27 |     data.path=$HOME/data/r1/test-output-k1.parquet \
28 |     data.prompt_key=prompt \
29 |     data.response_key=responses \
30 |     custom_reward_function.path=recipe/r1/reward_score.py \
31 |     custom_reward_function.name=reward_func


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt"
 6 | 
 7 | export PATH=$PATH:~/.local/bin
 8 | 
 9 | rm -rf $OUTPUT_FILE
10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
11 |     algorithm.adv_estimator=gae \
12 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
13 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
14 |     data.train_batch_size=800 \
15 |     data.max_prompt_length=16 \
16 |     data.max_response_length=32 \
17 |     data.return_raw_input_ids=True \
18 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
19 |     actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \
20 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=200 \
21 |     actor_rollout_ref.actor.entropy_coeff=0 \
22 |     actor_rollout_ref.actor.optim.lr=1e-4 \
23 |     actor_rollout_ref.actor.use_kl_loss=False \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \
25 |     actor_rollout_ref.rollout.name=hf \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
27 |     critic.ppo_micro_batch_size_per_gpu=200 \
28 |     critic.model.path=tests/e2e/arithmetic_sequence/model \
29 |     critic.optim.lr=1e-3 \
30 |     algorithm.use_kl_in_reward=False \
31 |     trainer.total_epochs=200 \
32 |     trainer.experiment_name=arithmetic_sequences \
33 |     trainer.logger=['console'] \
34 |     trainer.n_gpus_per_node=1 \
35 |     trainer.test_freq=1 \
36 |     trainer.save_freq=110 | tee $OUTPUT_FILE;
37 | 
38 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE
39 | rm -rf $OUTPUT_FILE
40 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer_fire_sampling.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt"
 6 | 
 7 | export PATH=$PATH:~/.local/bin
 8 | 
 9 | rm -rf $OUTPUT_FILE
10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
11 |     algorithm.adv_estimator=gae \
12 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
13 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
14 |     data.train_batch_size=800 \
15 |     data.val_batch_size=200 \
16 |     data.max_prompt_length=16 \
17 |     data.max_response_length=32 \
18 |     data.return_raw_input_ids=True \
19 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
20 |     actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \
21 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=200 \
22 |     actor_rollout_ref.actor.entropy_coeff=0 \
23 |     actor_rollout_ref.actor.optim.lr=1e-4 \
24 |     actor_rollout_ref.actor.use_kl_loss=False \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \
26 |     actor_rollout_ref.rollout.name=hf \
27 |     actor_rollout_ref.rollout.use_fire_sampling=True \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
29 |     critic.ppo_micro_batch_size_per_gpu=200 \
30 |     critic.model.path=tests/e2e/arithmetic_sequence/model \
31 |     critic.optim.lr=1e-3 \
32 |     algorithm.use_kl_in_reward=False \
33 |     trainer.total_epochs=200 \
34 |     trainer.experiment_name=arithmetic_sequences \
35 |     trainer.logger=['console'] \
36 |     trainer.n_gpus_per_node=1 \
37 |     trainer.test_freq=1 \
38 |     trainer.save_freq=110 | tee $OUTPUT_FILE;
39 | 
40 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE --target 0.19
41 | rm -rf $OUTPUT_FILE
42 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B
 6 | 
 7 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
 8 |     algorithm.adv_estimator=gae \
 9 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
10 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
11 |     actor_rollout_ref.actor.use_kl_loss=False \
12 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
13 |     actor_rollout_ref.rollout.name=vllm \
14 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
15 |     actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \
16 |     critic.model.path=Qwen/Qwen2.5-0.5B \
17 |     critic.model.use_remove_padding=True \
18 |     algorithm.use_kl_in_reward=False \
19 |     trainer.total_epochs=1


--------------------------------------------------------------------------------
/tests/e2e/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -xeuo pipefail
 3 | 
 4 | # Get the configuration name and engine name from arguments
 5 | CONFIG_NAME="$1"
 6 | ENGINE="${2:-vllm}"
 7 | 
 8 | # Download model if needed
 9 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir "$HOME/models/Qwen/Qwen2.5-0.5B"
10 | 
11 | # Run the training with the specified configuration
12 | python3 -m verl.trainer.main_ppo \
13 |     --config-name "$CONFIG_NAME" "$@" 


--------------------------------------------------------------------------------
/tests/e2e/sft/run_sft.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -xeuo pipefail
 3 | 
 4 | ENTRYPOINT=${ENTRYPOINT:-"-m verl.trainer.fsdp_sft_trainer"}
 5 | 
 6 | NUM_GPUS=${NUM_GPUS:-8}
 7 | 
 8 | MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
 9 | MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
10 | huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
11 | 
12 | TRAIN_FILES=${TRAIN_FILES:-$HOME/data/gsm8k/train.parquet}
13 | VAL_FILES=${VAL_FILES:-$HOME/data/gsm8k/test.parquet}
14 | 
15 | SP_SIZE=${SP_SIZE:-1}
16 | LIGER=${LIGER:-False}
17 | MULTITURN=${MULTITURN:-False}
18 | LORA_RANK=${LORA_RANK:-0}
19 | RM_PAD=${RM_PAD:-True}
20 | 
21 | micro_bsz=2
22 | NUM_GPUS=8
23 | 
24 | project_name="verl-test"
25 | exp_name="$(basename "${MODEL_ID,,}")-sft-minimal"
26 | ckpts_home=${ckpts_home:-$HOME/${project_name}/${exp_name}}
27 | 
28 | mkdir -p "${ckpts_home}"
29 | 
30 | torchrun --standalone --nnodes=1 --nproc_per_node=${NUM_GPUS} ${ENTRYPOINT} \
31 |     data.train_files="${TRAIN_FILES}" \
32 |     data.val_files="${VAL_FILES}" \
33 |     data.prompt_key=extra_info \
34 |     data.response_key=extra_info \
35 |     data.prompt_dict_keys=['question'] \
36 |     data.response_dict_keys=['answer'] \
37 |     data.multiturn.enable="${MULTITURN}" \
38 |     data.multiturn.messages_key=messages \
39 |     optim.lr=1e-4 \
40 |     data.micro_batch_size_per_gpu=${micro_bsz} \
41 |     model.partial_pretrain="${MODEL_PATH}" \
42 |     model.lora_rank="${LORA_RANK}" \
43 |     model.lora_alpha=16 \
44 |     model.target_modules=all-linear \
45 |     model.use_liger="${LIGER}" \
46 |     ulysses_sequence_parallel_size="${SP_SIZE}" \
47 |     use_remove_padding="${RM_PAD}" \
48 |     trainer.default_local_dir="${ckpts_home}" \
49 |     trainer.project_name="${project_name}" \
50 |     trainer.experiment_name="${exp_name}" \
51 |     trainer.total_training_steps=1 \
52 |     trainer.logger=['console'] \
53 |     trainer.default_hdfs_dir=null $@
54 | 
55 | rm -rf "${ckpts_home:?}/*"


--------------------------------------------------------------------------------
/tests/generation/run_gen_qwen05.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Tested with 1 & 4 GPUs
 3 | set -xeuo pipefail
 4 | 
 5 | MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
 6 | 
 7 | NGPUS_PER_NODE=${NGPUS_PER_NODE:-4}
 8 | OUTPUT_PATH=${OUTPUT_PATH:-$HOME/data/gen/qwen_05_gen_test.parquet}
 9 | GEN_TP=${GEN_TP:-2}  # Default tensor parallel size to 2
10 | 
11 | python3 -m verl.trainer.main_generation \
12 |     trainer.nnodes=1 \
13 |     trainer.n_gpus_per_node="${NGPUS_PER_NODE}" \
14 |     data.path="${HOME}/data/gsm8k/test.parquet" \
15 |     data.prompt_key=prompt \
16 |     data.n_samples=1 \
17 |     data.output_path="${OUTPUT_PATH}" \
18 |     model.path="${MODEL_ID}" \
19 |     +model.trust_remote_code=True \
20 |     rollout.temperature=1.0 \
21 |     rollout.top_k=50 \
22 |     rollout.top_p=0.7 \
23 |     rollout.prompt_length=2048 \
24 |     rollout.response_length=1024 \
25 |     rollout.tensor_model_parallel_size="${GEN_TP}" \
26 |     rollout.gpu_memory_utilization=0.8
27 | 


--------------------------------------------------------------------------------
/tests/gpu_utility/test_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_flash_attn_cross_entropy():
17 |     import torch
18 |     from flash_attn.ops.triton.cross_entropy import cross_entropy_loss
19 |     from torch import nn
20 | 
21 |     from verl.utils.debug import log_gpu_memory_usage
22 |     from verl.utils.torch_functional import logprobs_from_logits_naive
23 | 
24 |     log_gpu_memory_usage("At start")
25 | 
26 |     hidden_states = torch.randn(size=(2048, 5120), device="cuda", requires_grad=True, dtype=torch.bfloat16)
27 | 
28 |     linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device="cuda", dtype=torch.bfloat16)
29 | 
30 |     logits = linear(hidden_states)
31 | 
32 |     # logits = logits.float()
33 |     labels = torch.randint(low=0, high=155136, size=(2048,), device="cuda")
34 | 
35 |     log_gpu_memory_usage("before computation")
36 |     # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True)
37 |     output = -cross_entropy_loss(logits, labels)[0]
38 |     # output = logprobs_from_logits(logits, labels)
39 |     log_gpu_memory_usage("After forward")
40 |     output.sum().backward()
41 |     log_gpu_memory_usage("After backward")
42 | 
43 |     groundtruth = logprobs_from_logits_naive(logits.float(), labels)
44 | 
45 |     torch.testing.assert_close(output, groundtruth)
46 | 


--------------------------------------------------------------------------------
/tests/kill_github_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$#" -ne 1 ]; then
 4 |     echo "Usage: $0 YOUR_GITHUB_TOKEN"
 5 |     echo "Please provide exactly one input argument for your github token."
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Set your GitHub repository details
10 | OWNER="volcengine"
11 | REPO="verl"
12 | TOKEN=$1
13 | 
14 | # API URL for workflow runs
15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued"
16 | 
17 | # Check required commands
18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; }
19 | 
20 | # Get queued workflow runs
21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL")
22 | 
23 | # Run this for debugging
24 | # echo $response
25 | 
26 | # Extract run IDs
27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id')
28 | 
29 | if [ -z "$queued_run_ids" ]; then
30 |     echo "No queued workflow runs found."
31 |     exit 0
32 | fi
33 | 
34 | # Cancel each queued run
35 | for run_id in $queued_run_ids; do
36 |     echo "Cancelling run $run_id"
37 |     cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel"
38 |     curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url"
39 | done
40 | 
41 | echo "Cancelled all queued workflow runs."
42 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/tests/ray/test_check_worker_alive.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import subprocess
17 | import time
18 | 
19 | 
20 | def test():
21 |     wait_time = 10
22 | 
23 |     my_env = os.environ.copy()
24 |     my_env["WAIT_TIME"] = str(wait_time)
25 | 
26 |     p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE)
27 | 
28 |     count = 0
29 |     while b"foo started" not in p.stdout.read():
30 |         time.sleep(1)
31 |         count += 1
32 |         if count > 40:
33 |             raise RuntimeError("timeout for start foo in check_worker_alive/main.py")
34 | 
35 |     print(
36 |         time.time(),
37 |         f"wait 1.5 wait time {wait_time * 1.5} to let signal returned to process but still not exceed process wait time",
38 |     )
39 |     time.sleep(wait_time * 1.5)
40 |     print(time.time(), "start checking")
41 |     assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort"
42 |     assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code"
43 |     print("test passed")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     test()
48 | 


--------------------------------------------------------------------------------
/tests/ray/test_ray_local_envs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | e2e test verl.single_controller.ray
16 | """
17 | 
18 | import os
19 | 
20 | import ray
21 | 
22 | from verl.single_controller.base.worker import Worker
23 | from verl.single_controller.ray.base import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup
24 | 
25 | 
26 | @ray.remote
27 | class TestActor(Worker):
28 |     def __init__(self) -> None:
29 |         super().__init__()
30 | 
31 |     def getenv(self, key):
32 |         val = os.getenv(key, f"{key} not set")
33 |         return val
34 | 
35 | 
36 | def test_basics():
37 |     ray.init()
38 | 
39 |     # create 4 workers, each hold a GPU
40 |     resource_pool = RayResourcePool([4], use_gpu=True)
41 |     class_with_args = RayClassWithInitArgs(cls=TestActor)
42 | 
43 |     worker_group = RayWorkerGroup(
44 |         resource_pool=resource_pool, ray_cls_with_init=class_with_args, name_prefix="worker_group_basic"
45 |     )
46 | 
47 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE")
48 |     assert output == ["4", "4", "4", "4"]
49 | 
50 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK")
51 |     assert set(output) == set(["0", "1", "2", "3"])
52 | 
53 |     ray.shutdown()
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     test_basics()
58 | 


--------------------------------------------------------------------------------
/tests/ray/test_rvdz.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class TestWorker:
20 |     def __init__(self, rank, world_size, group_name):
21 |         self.rank = rank
22 |         self.world_size = world_size
23 |         self.group_name = group_name
24 |         self.communicator = None
25 | 
26 |     def init(self):
27 |         from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray
28 | 
29 |         self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name)
30 | 
31 |     def test(self):
32 |         if self.communicator is None:
33 |             return None
34 |         return self.communicator.rank_id()
35 | 
36 | 
37 | def test_rvdz():
38 |     ray.init()
39 | 
40 |     group_name = "test_group"
41 |     world_size = 2
42 | 
43 |     workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)]
44 | 
45 |     ray.get([worker.init.remote() for worker in workers])
46 | 
47 |     ranks = ray.get([worker.test.remote() for worker in workers])
48 | 
49 |     assert ranks == [0, 1], f"expecting [0, 1], got {ranks}"
50 | 
51 |     ray.shutdown()
52 | 


--------------------------------------------------------------------------------
/tests/sanity/check_license.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | license_head_bytedance = "Copyright 2024 Bytedance Ltd. and/or its affiliates"
16 | license_head_bytedance_25 = "Copyright 2025 Bytedance Ltd. and/or its affiliates"
17 | # Add custom license headers below
18 | license_head_prime = "Copyright 2024 PRIME team and/or its affiliates"
19 | license_head_individual = "Copyright 2025 Individual Contributor:"
20 | license_headers = [license_head_bytedance, license_head_bytedance_25, license_head_prime, license_head_individual]
21 | 
22 | from argparse import ArgumentParser
23 | from pathlib import Path
24 | 
25 | if __name__ == "__main__":
26 |     parser = ArgumentParser()
27 |     parser.add_argument("--directory", "-d", required=True, type=str)
28 |     args = parser.parse_args()
29 |     directory_in_str = args.directory
30 | 
31 |     pathlist = Path(directory_in_str).glob("**/*.py")
32 |     for path in pathlist:
33 |         # because path is object not string
34 |         path_in_str = str(path.absolute())
35 |         print(path_in_str)
36 |         with open(path_in_str, encoding="utf-8") as f:
37 |             file_content = f.read()
38 | 
39 |             has_license = False
40 |             for lh in license_headers:
41 |                 if lh in file_content:
42 |                     has_license = True
43 |                     break
44 |             assert has_license, f"file {path_in_str} does not contain license"
45 | 


--------------------------------------------------------------------------------
/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_import():
17 |     import verl
18 | 
19 |     print(verl.__version__)
20 | 
21 | 
22 | def test_single_controller_import():
23 |     import verl.single_controller
24 | 
25 |     print(verl.single_controller.__version__)
26 | 


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_rm_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from verl.utils import hf_tokenizer
17 | from verl.utils.dataset.rm_dataset import RMDataset
18 | 
19 | 
20 | def get_rm_data():
21 |     # prepare test dataset
22 |     url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet"
23 |     local_folder = os.path.expanduser("~/verl-data/full_hh_rlhf/rm/")
24 |     local_path = os.path.join(local_folder, "test.parquet")
25 |     os.makedirs(local_folder, exist_ok=True)
26 |     return local_path
27 | 
28 | 
29 | def test_rm_dataset():
30 |     tokenizer = hf_tokenizer("facebook/opt-1.3b")
31 |     local_path = get_rm_data()
32 |     dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512)
33 |     data = dataset[0]["input_ids"]
34 |     output = tokenizer.batch_decode(data)
35 |     assert len(output) > 1
36 |     assert type(output[0]) == str
37 | 


--------------------------------------------------------------------------------
/tests/verl/utils/test_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | # Test module for import_utils.load_extern_type testing
17 | class TestClass:
18 |     """A test class to be imported by load_extern_type"""
19 | 
20 |     def __init__(self, value=None):
21 |         self.value = value or "default"
22 | 
23 |     def get_value(self):
24 |         return self.value
25 | 
26 | 
27 | TEST_CONSTANT = "test_constant_value"
28 | 
29 | 
30 | def test_function():
31 |     return "test_function_result"
32 | 


--------------------------------------------------------------------------------
/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, "version/version")) as f:
20 |     __version__ = f.read().strip()
21 | 
22 | import logging
23 | 
24 | from .protocol import DataProto
25 | from .utils.logging_utils import set_basic_config
26 | 
27 | set_basic_config(level=logging.WARNING)
28 | 
29 | from . import single_controller
30 | 
31 | __all__ = ["DataProto", "__version__"]
32 | 
33 | if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true":
34 |     import importlib
35 | 
36 |     if importlib.util.find_spec("modelscope") is None:
37 |         raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`")
38 |     # Patch hub to download models from modelscope to speed up.
39 |     from modelscope.utils.hf_util import patch_hub
40 | 
41 |     patch_hub()
42 | 


--------------------------------------------------------------------------------
/verl/models/README.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 
 3 | ## Adding a New Huggingface Model
 4 | ### Step 1: Copy the model file from HF to verl
 5 | - Add a new file under verl/models/hf
 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf
 7 | 
 8 | ### Step 2: Modify the model file to use packed inputs
 9 | - Remove all the code related to inference (kv cache)
10 | - Modify the inputs to include only
11 |     - input_ids (total_nnz,)
12 |     - cu_seqlens (total_nnz + 1,)
13 |     - max_seqlen_in_batch: int
14 | - Note that this requires using flash attention with causal mask.
15 | 
16 | ### Step 2.5: Add tests
17 | - Add a test to compare this version and the huggingface version
18 | - Following the infrastructure and add tests to tests/models/hf
19 | 
20 | ### Step 3: Add a function to apply tensor parallelism
21 | - Please follow
22 |     - https://pytorch.org/docs/stable/distributed.tensor.parallel.html
23 |     - https://pytorch.org/tutorials/intermediate/TP_tutorial.html
24 | - General comments
25 |     - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward.
26 | 
27 | ### Step 4: Add a function to apply data parallelism
28 | - Please use FSDP2 APIs
29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413
30 | 
31 | ### Step 5: Add a function to apply pipeline parallelism
32 | - Comes in Pytorch 2.4
33 | - Currently only in alpha in nightly version
34 | - Check torchtitan for more details
35 | 
36 | 


--------------------------------------------------------------------------------
/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     ParallelLlamaForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelLlamaForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelLlamaForCausalLMRmPadPP,
21 |     ParallelLlamaForValueRmPad,
22 |     ParallelLlamaForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelLlamaModel,
25 | )
26 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_mlp import ParallelLlamaMLP
18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
19 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | 
17 | import torch
18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
19 | from megatron.core import ModelParallelConfig
20 | from torch import nn
21 | from transformers import LlamaConfig
22 | 
23 | from verl.utils.megatron import sequence_parallel as sp_utils
24 | 
25 | 
26 | class ParallelLlamaRMSNorm(nn.Module):
27 |     def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig):
28 |         """
29 |         LlamaRMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(
43 |             input=hidden_states,
44 |             weight=self.weight,
45 |             normalized_shape=self.normalized_shape,
46 |             eps=self.variance_epsilon,
47 |             memory_efficient=True,
48 |         )
49 | 


--------------------------------------------------------------------------------
/verl/models/mcore/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .registry import get_mcore_forward_fn, get_mcore_weight_converter, hf_to_mcore_config, init_mcore_model
17 | 
18 | __all__ = ["init_mcore_model", "hf_to_mcore_config", "get_mcore_forward_fn", "get_mcore_weight_converter"]
19 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_qwen2_megatron import (
16 |     ParallelQwen2ForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelQwen2ForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelQwen2ForCausalLMRmPadPP,
21 |     ParallelQwen2ForValueRmPad,
22 |     ParallelQwen2ForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelQwen2Model,
25 | )
26 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelQwen2Attention
16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad
17 | from .parallel_mlp import ParallelQwen2MLP
18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm
19 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | 
17 | import torch
18 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
19 | from megatron.core import ModelParallelConfig
20 | from torch import nn
21 | from transformers import Qwen2Config
22 | 
23 | from verl.utils.megatron import sequence_parallel as sp_utils
24 | 
25 | 
26 | class ParallelQwen2RMSNorm(nn.Module):
27 |     def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig):
28 |         """
29 |         Qwen2RMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(
43 |             input=hidden_states,
44 |             weight=self.weight,
45 |             normalized_shape=self.normalized_shape,
46 |             eps=self.variance_epsilon,
47 |             memory_efficient=True,
48 |         )
49 | 


--------------------------------------------------------------------------------
/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/weight_loader_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def get_weight_loader(arch: str):
17 |     from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel
18 | 
19 |     _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {
20 |         "LlamaForCausalLM": load_state_dict_to_megatron_gptmodel,
21 |         "Qwen2ForCausalLM": load_state_dict_to_megatron_gptmodel,
22 |     }
23 | 
24 |     if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY:
25 |         return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch]
26 |     raise ValueError(
27 |         f"Model architectures {arch} loader are not supported for now. "
28 |         f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}"
29 |     )
30 | 
31 | 
32 | def get_weight_saver(arch: str):
33 |     from verl.models.mcore.saver import merge_megatron_ckpt_gptmodel, merge_megatron_ckpt_gptmodel_qwen_moe
34 | 
35 |     _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY = {
36 |         "LlamaForCausalLM": merge_megatron_ckpt_gptmodel,
37 |         "Qwen2ForCausalLM": merge_megatron_ckpt_gptmodel,
38 |         "Qwen2MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe,
39 |     }
40 |     if arch in _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY:
41 |         return _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY[arch]
42 |     raise ValueError(
43 |         f"Model architectures {arch} saver are not supported for now. "
44 |         f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY.keys()}"
45 |     )
46 | 


--------------------------------------------------------------------------------
/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | # Note(haibin.lin): single_controller.__version__ is deprecated
20 | with open(os.path.join(os.path.join(version_folder, os.pardir), "version/version")) as f:
21 |     __version__ = f.read().strip()
22 | 
23 | from . import base
24 | from .base import *
25 | 
26 | __all__ = base.__all__
27 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup
17 | 
18 | __all__ = ["Worker", "WorkerGroup", "ClassWithInitArgs", "ResourcePool"]
19 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class WorkerGroupRegisterCenter:
20 |     def __init__(self, rank_zero_info):
21 |         self.rank_zero_info = rank_zero_info
22 | 
23 |     def get_rank_zero_info(self):
24 |         return self.rank_zero_info
25 | 
26 | 
27 | def create_worker_group_register_center(name, info):
28 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
29 | 


--------------------------------------------------------------------------------
/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup, create_colocated_worker_cls
16 | 


--------------------------------------------------------------------------------
/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/sglang/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
15 | #
16 | # Licensed under the Apache License, Version 2.0 (the "License");
17 | # you may not use this file except in compliance with the License.
18 | # You may obtain a copy of the License at
19 | #
20 | #     http://www.apache.org/licenses/LICENSE-2.0
21 | #
22 | # Unless required by applicable law or agreed to in writing, software
23 | # distributed under the License is distributed on an "AS IS" BASIS,
24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | # See the License for the specific language governing permissions and
26 | # limitations under the License.
27 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from importlib.metadata import PackageNotFoundError, version
16 | 
17 | from packaging import version as vs
18 | 
19 | from verl.utils.import_utils import is_sglang_available
20 | 
21 | 
22 | def get_version(pkg):
23 |     try:
24 |         return version(pkg)
25 |     except PackageNotFoundError:
26 |         return None
27 | 
28 | 
29 | package_name = "vllm"
30 | package_version = get_version(package_name)
31 | vllm_version = None
32 | 
33 | if package_version == "0.5.4":
34 |     vllm_version = "0.5.4"
35 |     from .vllm_v_0_5_4 import parallel_state
36 |     from .vllm_v_0_5_4.llm import LLM, LLMEngine
37 | elif package_version == "0.6.3" or package_version == "0.6.3+rocm624" or package_version == "0.6.3+rocm634":
38 |     vllm_version = "0.6.3"
39 |     from .vllm_v_0_6_3 import parallel_state
40 |     from .vllm_v_0_6_3.llm import LLM, LLMEngine
41 | elif vs.parse(package_version) >= vs.parse("0.7.0"):
42 |     # From 0.6.6.post2 on, vllm supports SPMD inference
43 |     # See https://github.com/vllm-project/vllm/pull/12071
44 | 
45 |     from vllm import LLM
46 |     from vllm.distributed import parallel_state
47 | else:
48 |     if not is_sglang_available():
49 |         raise ValueError(
50 |             f"vllm version {package_version} not supported and SGLang also not Found. Currently supported vllm versions are 0.6.3 and 0.7.0+"
51 |         )
52 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models
15 | 
16 | from typing import Dict
17 | 
18 | import torch.nn as nn
19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype
20 | 
21 | 
22 | def update_hf_weight_loader():
23 |     print("no hf weight loader need to be updated")
24 |     return
25 | 
26 | 
27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module):
28 |     assert isinstance(actor_weights, Dict)
29 |     with set_default_torch_dtype(next(vllm_model.parameters()).dtype):  # TODO
30 |         if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights:
31 |             del actor_weights["lm_head.weight"]
32 |         vllm_model.load_weights(actor_weights.items())
33 |     for _, module in vllm_model.named_modules():
34 |         quant_method = getattr(module, "quant_method", None)
35 |         if quant_method is not None:
36 |             quant_method.process_weights_after_loading(module)
37 |         # FIXME: Remove this after Mixtral is updated
38 |         # to use quant_method.
39 |         if hasattr(module, "process_weights_after_loading"):
40 |             module.process_weights_after_loading()
41 |     vllm_model = vllm_model.cuda()
42 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader
15 | 
16 | from typing import Dict
17 | 
18 | import torch.nn as nn
19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype
20 | 
21 | 
22 | def update_hf_weight_loader():
23 |     print("no hf weight loader need to be updated")
24 |     return
25 | 
26 | 
27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module):
28 |     assert isinstance(actor_weights, Dict)
29 |     with set_default_torch_dtype(next(vllm_model.parameters()).dtype):  # TODO
30 |         if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights:
31 |             del actor_weights["lm_head.weight"]
32 |         vllm_model.load_weights(actor_weights.items())
33 |     for _, module in vllm_model.named_modules():
34 |         quant_method = getattr(module, "quant_method", None)
35 |         if quant_method is not None:
36 |             quant_method.process_weights_after_loading(module)
37 |         # FIXME: Remove this after Mixtral is updated
38 |         # to use quant_method.
39 |         if hasattr(module, "process_weights_after_loading"):
40 |             module.process_weights_after_loading()
41 |     vllm_model = vllm_model.cuda()
42 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
15 | 
16 | from typing import Optional
17 | 
18 | from transformers import PreTrainedTokenizer
19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup
20 | from vllm.utils import LRUCache
21 | 
22 | 
23 | class TokenizerGroup(TokenizerGroup):
24 |     """A group of tokenizers that can be used for LoRA adapters."""
25 | 
26 |     def __init__(
27 |         self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, max_input_length: Optional[int]
28 |     ):
29 |         self.enable_lora = enable_lora
30 |         self.max_input_length = max_input_length
31 |         self.tokenizer = tokenizer
32 |         self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None
33 | 
34 |     # FIXME(sgm): for simplicity, we assign the special token here
35 |     @property
36 |     def pad_token_id(self):
37 |         return self.tokenizer.pad_token_id
38 | 
39 |     @property
40 |     def eos_token_id(self):
41 |         return self.tokenizer.eos_token_id
42 | 


--------------------------------------------------------------------------------
/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 
12 | ray_init:
13 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.


--------------------------------------------------------------------------------
/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 | 
 5 | data:
 6 |   path: ~/data/rlhf/math/test.parquet
 7 |   prompt_key: prompt
 8 |   n_samples: 5
 9 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
10 |   batch_size: 128
11 | 
12 | model:
13 |   path: ~/models/Qwen2-7B-Instruct
14 |   external_lib: null
15 | rollout:
16 |   name: vllm
17 |   temperature: 1.0
18 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
19 |   top_p: 0.7
20 |   prompt_length: 1536
21 |   response_length: 512
22 |   # for vllm rollout
23 |   dtype: bfloat16 # should align with FSDP
24 |   gpu_memory_utilization: 0.5
25 |   ignore_eos: False
26 |   enforce_eager: True
27 |   free_cache_engine: True
28 |   load_format: dummy_dtensor
29 |   tensor_model_parallel_size: 1
30 |   max_num_batched_tokens: 8192
31 |   max_model_len: null
32 |   max_num_seqs: 1024
33 |   log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
34 |   log_prob_micro_batch_size_per_gpu: 8
35 |   # for fire vllm rollout
36 |   use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236
37 |   # for hf rollout
38 |   do_sample: True
39 |   disable_log_stats: True
40 |   enable_chunked_prefill: True
41 |   n: 1
42 | actor:
43 |   strategy: fsdp  # This is for backward-compatibility
44 |   ulysses_sequence_parallel_size: 1 # sp size
45 |   fsdp_config:
46 |     fsdp_size: -1
47 | 
48 | ray_init:
49 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
50 | 


--------------------------------------------------------------------------------
/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
 4 |   micro_batch_size_per_gpu: 4  # this is also val batch size
 5 |   train_files: ~/data/gsm8k/train.parquet
 6 |   val_files: ~/data/gsm8k/test.parquet
 7 |   # Single-turn settings
 8 |   prompt_key: question
 9 |   response_key: answer
10 |   prompt_dict_keys: ['question']
11 |   response_dict_keys: ['answer']
12 |   # Multi-turn settings
13 |   multiturn:
14 |     enable: false  # Set to true to use multi-turn dataset
15 |     messages_key: messages  # Key for messages list in multi-turn mode
16 |   max_length: 1024
17 |   truncation: error
18 |   balance_dp_token: False
19 |   chat_template: null
20 |   custom_cls:
21 |     path: null
22 |     name: null
23 | model:
24 |   partial_pretrain: ~/models/gemma-1.1-7b-it
25 |   fsdp_config:
26 |     wrap_policy:
27 |       min_num_params: 0
28 |     cpu_offload: False
29 |     offload_params: False
30 |   external_lib: null
31 |   enable_gradient_checkpointing: False
32 |   trust_remote_code: False
33 |   lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
34 |   lora_alpha: 16  # LoRA scaling factor
35 |   target_modules: all-linear  # Target modules for LoRA adaptation
36 |   use_liger: False
37 | optim:
38 |   lr: 1e-5
39 |   betas: [0.9, 0.95]
40 |   weight_decay: 0.01
41 |   warmup_steps_ratio: 0.1
42 |   clip_grad: 1.0
43 |   lr_scheduler: cosine
44 | ulysses_sequence_parallel_size: 1
45 | use_remove_padding: False
46 | trainer:
47 |   default_local_dir: /tmp/sft_model
48 |   default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here
49 |   resume_path: null
50 |   project_name: gsm8k-sft
51 |   experiment_name: test
52 |   total_epochs: 4
53 |   total_training_steps: null
54 |   logger: ['console']
55 |   seed: 1
56 | 
57 | 


--------------------------------------------------------------------------------
/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
6 |   # VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import tokenizer
16 | from .tokenizer import hf_processor, hf_tokenizer
17 | 
18 | __all__ = tokenizer.__all__
19 | 


--------------------------------------------------------------------------------
/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from omegaconf import DictConfig
18 | 
19 | 
20 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
21 |     for key in dictionary:
22 |         if hasattr(config, key):
23 |             dictionary[key] = getattr(config, key)
24 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 


--------------------------------------------------------------------------------
/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import GPUMemoryLogger, log_gpu_memory_usage
16 | 


--------------------------------------------------------------------------------
/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | 
16 | import os
17 | 
18 | 
19 | def initialize_global_process_group(timeout_second=36000):
20 |     from datetime import timedelta
21 | 
22 |     import torch.distributed
23 | 
24 |     torch.distributed.init_process_group("nccl", timeout=timedelta(seconds=timeout_second))
25 |     local_rank = int(os.environ["LOCAL_RANK"])
26 |     rank = int(os.environ["RANK"])
27 |     world_size = int(os.environ["WORLD_SIZE"])
28 | 
29 |     if torch.distributed.is_initialized():
30 |         torch.cuda.set_device(local_rank)
31 |     return local_rank, rank, world_size
32 | 


--------------------------------------------------------------------------------
/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | 
18 | import torch
19 | 
20 | 
21 | def set_basic_config(level):
22 |     """
23 |     This function sets the global logging format and level. It will be called when import verl
24 |     """
25 |     logging.basicConfig(format="%(levelname)s:%(asctime)s:%(message)s", level=level)
26 | 
27 | 
28 | def log_to_file(string):
29 |     print(string)
30 |     if os.path.isdir("logs"):
31 |         with open(f"logs/log_{torch.distributed.get_rank()}", "a+") as f:
32 |             f.write(string + "\n")
33 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | 
18 | class MemoryBuffer:
19 |     def __init__(self, numel, numel_padded, dtype):
20 |         self.numel = numel
21 |         self.numel_padded = numel_padded
22 |         self.dtype = dtype
23 |         self.data = torch.zeros(
24 |             self.numel_padded, dtype=self.dtype, device=torch.cuda.current_device(), requires_grad=False
25 |         )
26 | 
27 |     def zero(self):
28 |         """Reset the buffer to zero."""
29 |         self.data.zero_()
30 | 
31 |     def get(self, shape, start_index):
32 |         """Return a tensor with the input `shape` as a view into the
33 |         1-D data starting at `start_index`."""
34 |         end_index = start_index + shape.numel()
35 |         assert end_index <= self.numel, "requested tensor is out of the buffer range."
36 |         buffer_tensor = self.data[start_index:end_index]
37 |         buffer_tensor = buffer_tensor.view(shape)
38 |         return buffer_tensor
39 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from megatron.core.optimizer import OptimizerConfig
17 | from megatron.core.optimizer import get_megatron_optimizer as get_megatron_optimizer_native
18 | 
19 | 
20 | def get_megatron_optimizer(
21 |     model,
22 |     config: OptimizerConfig,
23 |     no_weight_decay_cond=None,
24 |     scale_lr_cond=None,
25 |     lr_mult=1.0,
26 | ):
27 |     # Base optimizer.
28 |     return get_megatron_optimizer_native(
29 |         config=config,
30 |         model_chunks=model,
31 |         no_weight_decay_cond=no_weight_decay_cond,
32 |         scale_lr_cond=scale_lr_cond,
33 |         lr_mult=lr_mult,
34 |     )
35 | 
36 | 
37 | # TODO: add get_optimizer_param_scheduler(optimizer) to implement lr scheuler.
38 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/sequence_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | import torch.nn.functional as F
18 | from megatron.core import parallel_state as mpu
19 | 
20 | 
21 | def mark_parameter_as_sequence_parallel(parameter):
22 |     parameter.sequence_parallel = True
23 | 
24 | 
25 | def is_sequence_parallel_param(param):
26 |     return hasattr(param, "sequence_parallel") and param.sequence_parallel
27 | 
28 | 
29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor):
30 |     """pad the tokens such that the total length is a multiple of sp world size
31 | 
32 |     Args:
33 |         unpad_tokens: (total_nnz, ...). Tokens after removing padding
34 | 
35 |     Returns:
36 | 
37 |     """
38 |     total_nnz = unpad_tokens.shape[0]
39 |     sp_world_size = mpu.get_tensor_model_parallel_world_size()
40 | 
41 |     pad_size = 0 if total_nnz % sp_world_size == 0 else sp_world_size - total_nnz % sp_world_size
42 | 
43 |     if pad_size > 0:
44 |         if unpad_tokens.ndim == 1:
45 |             unpad_tokens = F.pad(unpad_tokens, (0, pad_size))
46 |         elif unpad_tokens.ndim == 2:
47 |             unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size))
48 |         else:
49 |             raise NotImplementedError(f"Padding dim {unpad_tokens.ndim()} is not supported")
50 | 
51 |     return unpad_tokens
52 | 


--------------------------------------------------------------------------------
/verl/utils/py_functional.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contain small python utility functions
16 | """
17 | 
18 | from types import SimpleNamespace
19 | from typing import Dict
20 | 
21 | 
22 | def union_two_dict(dict1: Dict, dict2: Dict):
23 |     """Union two dict. Will throw an error if there is an item not the same object with the same key.
24 | 
25 |     Args:
26 |         dict1:
27 |         dict2:
28 | 
29 |     Returns:
30 | 
31 |     """
32 |     for key, val in dict2.items():
33 |         if key in dict1:
34 |             assert dict2[key] == dict1[key], f"{key} in meta_dict1 and meta_dict2 are not the same object"
35 |         dict1[key] = val
36 | 
37 |     return dict1
38 | 
39 | 
40 | def append_to_dict(data: Dict, new_data: Dict):
41 |     for key, val in new_data.items():
42 |         if key not in data:
43 |             data[key] = []
44 |         data[key].append(val)
45 | 
46 | 
47 | class NestedNamespace(SimpleNamespace):
48 |     def __init__(self, dictionary, **kwargs):
49 |         super().__init__(**kwargs)
50 |         for key, value in dictionary.items():
51 |             if isinstance(value, dict):
52 |                 self.__setattr__(key, NestedNamespace(value))
53 |             else:
54 |                 self.__setattr__(key, value)
55 | 


--------------------------------------------------------------------------------
/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains commonly used utilities for ray
16 | """
17 | 
18 | import concurrent.futures
19 | 
20 | import ray
21 | 
22 | 
23 | def parallel_put(data_list, max_workers=None):
24 |     def put_data(index, data):
25 |         return index, ray.put(data)
26 | 
27 |     if max_workers is None:
28 |         max_workers = min(len(data_list), 16)
29 | 
30 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
31 |         data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)]
32 |         res_lst = []
33 |         for future in concurrent.futures.as_completed(data_list_f):
34 |             res_lst.append(future.result())
35 | 
36 |         # reorder based on index
37 |         output = [None for _ in range(len(data_list))]
38 |         for res in res_lst:
39 |             index, data_ref = res
40 |             output[index] = data_ref
41 | 
42 |     return output
43 | 


--------------------------------------------------------------------------------
/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/geo3k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | from mathruler.grader import extract_boxed_content, grade_answer
18 | 
19 | 
20 | def format_reward(predict_str: str) -> float:
21 |     pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
22 |     match_result = re.fullmatch(pattern, predict_str)
23 |     return 1.0 if match_result else 0.0
24 | 
25 | 
26 | def acc_reward(predict_str: str, ground_truth: str) -> float:
27 |     answer = extract_boxed_content(predict_str)
28 |     return 1.0 if grade_answer(answer, ground_truth) else 0.0
29 | 
30 | 
31 | def compute_score(predict_str: str, ground_truth: str) -> float:
32 |     return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str)
33 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/math_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Individual Contributor: Mert Unsal
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .math import compute_score
16 | 
17 | 
18 | def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):
19 |     """
20 |     This is a demonstration of how the batched reward function should look like.
21 |     Typically, you want to use batched reward to speed up the process with parallelization
22 |     """
23 |     return [
24 |         compute_score(solution_str, ground_truth) for solution_str, ground_truth in zip(solution_strs, ground_truths)
25 |     ]
26 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/math_verify.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | try:
16 |     from math_verify.errors import TimeoutException
17 |     from math_verify.metric import math_metric
18 |     from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
19 | except ImportError:
20 |     print("To use Math-Verify, please install it first by running `pip install math-verify`.")
21 | 
22 | 
23 | def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> bool:
24 |     verify_func = math_metric(
25 |         gold_extraction_target=(LatexExtractionConfig(),),
26 |         pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
27 |     )
28 |     ret_score = 0.0
29 | 
30 |     # Wrap the ground truth in \boxed{} format for verification
31 |     ground_truth_boxed = "\\boxed{" + ground_truth + "}"
32 |     try:
33 |         ret_score, _ = verify_func([ground_truth_boxed], [model_output])
34 |     except Exception:
35 |         pass
36 |     except TimeoutException:
37 |         ret_score = timeout_score
38 | 
39 |     return ret_score
40 | 


--------------------------------------------------------------------------------
/verl/utils/tensorboard_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from torch.utils.tensorboard import SummaryWriter
 3 | from verl import DataProto
 4 | 
 5 | 
 6 | class TensorboardLogger:
 7 |     
 8 |     def __init__(
 9 |         self,
10 |         root_log_dir: str,
11 |         project_name: str,
12 |         experiment_name: str
13 |     ):
14 |         self.writer = SummaryWriter(
15 |             log_dir=os.path.join(
16 |                 root_log_dir, 
17 |                 project_name, 
18 |                 experiment_name
19 |             )
20 |         )
21 | 
22 |     def log(
23 |         self,
24 |         data: dict,
25 |         step: int,
26 |         *args,
27 |         **kwargs
28 |     ):
29 |         for k, v in data.items():
30 |             try:
31 |                 self.writer.add_scalar(k, v, step)
32 |             except:
33 |                 print("[TensorboardLogger] Failed to log key:", k, ", skipped.")


--------------------------------------------------------------------------------
/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.2.0.dev
2 | 


--------------------------------------------------------------------------------
/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/agent/__init__.py:
--------------------------------------------------------------------------------
 1 | # NOTE: Env must be imported here in order to trigger metaclass registering
 2 | from .envs.rag_engine.rag_engine import RAGEngineEnv
 3 | from .envs.rag_engine.rag_engine_v2 import RAGEngineEnvV2
 4 | from .envs.visual_agent.vl_agent_v1 import VLAgentEnvV1
 5 | from .envs.visual_agent.vl_agent_v2 import VLAgentEnvV2
 6 | from .envs.mm_process_engine.visual_toolbox import VisualToolBox
 7 | from .envs.mm_process_engine.visual_toolbox_v2 import VisualToolBoxV2
 8 | from .envs.mm_process_engine.visual_toolbox_v3 import VisualToolBoxV3
 9 | from .envs.mm_process_engine.visual_toolbox_v4 import VisualToolBoxV4
10 | from .envs.mm_process_engine.visual_toolbox_v5 import VisualToolBoxV5
11 | from .envs.visual_agent.vl_agent_v2 import VLAgentEnvV2
12 | from .envs.visual_agent.vl_agent_v3 import VLAgentEnvV3
13 | 
14 | try:
15 |     from .envs.visual_agent.mm_search_engine import MMSearchEngine
16 | except Exception as err:
17 |     print(f' [ERROR] Failed to register MMSearchEngine : {err=}')
18 | 
19 | try:
20 |     from .envs.frozenlake.frozenlake import FrozenLakeTool
21 | except Exception as err:
22 |     print(f' [ERROR] Failed to register FrozenLakeTool : {err=}')
23 | 
24 | from .parallel_env import agent_rollout_loop
25 | 


--------------------------------------------------------------------------------
/verl/workers/agent/envs/ENV_README.md:
--------------------------------------------------------------------------------
 1 | # FrozenLake
 2 | 1. An example for create frozenlake dataset
 3 | 
 4 |     create text type dataset: 
 5 |     ```bash
 6 |     export SIZE=8
 7 |     export P=0.8
 8 |     python verl/workers/agent/envs/frozenlake/create_dataset.py \
 9 |         --env frozenlake \
10 |         --seed 1000 \
11 |         --output data/frozenlake \
12 |         --train_size 3000 \
13 |         --test_size 100 \
14 |         --prefix qwen-instruct
15 |     ```
16 | 
17 |     create multi-model type dataset:
18 |     ```bash
19 |     export SIZE=8
20 |     export P=0.8
21 |     python verl/workers/agent/envs/frozenlake/create_dataset.py \
22 |         --env frozenlake \
23 |         --seed 1000 \
24 |         --output data/frozenlake \
25 |         --train_size 3000 \
26 |         --test_size 100 \
27 |         --prefix qwen-instruct \
28 |         --use_mm
29 |     ```
30 | # Search(RAG)
31 | 


--------------------------------------------------------------------------------
/verl/workers/agent/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/verl/workers/agent/envs/__init__.py


--------------------------------------------------------------------------------
/verl/workers/agent/envs/frozenlake/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/verl/workers/agent/envs/frozenlake/__init__.py


--------------------------------------------------------------------------------
/verl/workers/agent/envs/mm_process_engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/verl/workers/agent/envs/mm_process_engine/__init__.py


--------------------------------------------------------------------------------
/verl/workers/agent/envs/rag_engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/verl/workers/agent/envs/rag_engine/__init__.py


--------------------------------------------------------------------------------
/verl/workers/agent/envs/rag_engine/test_client.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import sys
 3 | from pprint import pprint
 4 | 
 5 | port = str(sys.argv[1]) if len(sys.argv) >= 2 else '25004'
 6 | query = str(sys.argv[2]) if len(sys.argv) >= 3 else "Apple"
 7 | 
 8 | url_wiki = f"http://127.0.0.1:{port}/queries"
 9 | query_list = [query]
10 | topk = 3
11 | response = requests.post(url_wiki, json={"queries": query_list, "k": topk})
12 | if response.status_code == 200:
13 |     for results in response.json()['answers']:
14 |         for i, res in enumerate(results):
15 |             print(f"[{i}] {res}")
16 | else:
17 |     print(response.text)
18 | 


--------------------------------------------------------------------------------
/verl/workers/agent/envs/sokoban/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/verl/workers/agent/envs/sokoban/__init__.py


--------------------------------------------------------------------------------
/verl/workers/agent/envs/visual_agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Visual-Agent/DeepEyes/561293def6dc71fa7ac8b5bc674c070c393c9d94/verl/workers/agent/envs/visual_agent/__init__.py


--------------------------------------------------------------------------------
/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | import torch
21 | 
22 | from verl import DataProto
23 | 
24 | __all__ = ["BasePPOCritic"]
25 | 
26 | 
27 | class BasePPOCritic(ABC):
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .batch import BatchRewardManager
16 | from .dapo import DAPORewardManager
17 | from .naive import NaiveRewardManager
18 | from .prime import PrimeRewardManager
19 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for reward model
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class BasePPORewardModel(ABC):
24 |     def __init__(self, config):
25 |         self.config = config
26 | 
27 |     @abstractmethod
28 |     def compute_reward(self, data: DataProto) -> DataProto:
29 |         """Computing reward given input_ids. The transformers should output a tensor with shape
30 |            [batch_size, sequence_length], and the value at [EOS] mask should be gathered.
31 | 
32 |         Args:
33 |             data: must contain keys "input_ids", "attention_mask" and "position_ids".
34 |                 - input_ids: [batch_size, sequence_length]
35 |                 - attention_mask: [batch_size, sequence_length]
36 |                 - position_ids: [batch_size, sequence_length]
37 | 
38 |         Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward.
39 |             Other position should have zero reward. Note that this may change in the future if we use
40 |             dense reward. So, we leave the interface for general case.
41 |             - reward: [batch_size, sequence_length].
42 | 
43 |         """
44 |         pass
45 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .hf_rollout import HFRollout
17 | from .naive import NaiveRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | 
17 | from verl import DataProto
18 | 
19 | __all__ = ["BaseRollout"]
20 | 
21 | 
22 | class BaseRollout(ABC):
23 |     def __init__(self):
24 |         """
25 | 
26 |         Args:
27 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
28 |             should handle when the training stops.
29 |         """
30 |         super().__init__()
31 | 
32 |     @abstractmethod
33 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
34 |         """Generate sequences"""
35 |         pass
36 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/sglang_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | 
14 | from .sglang_rollout import SGLangRollout
15 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from importlib.metadata import PackageNotFoundError, version
16 | 
17 | ###
18 | # [SUPPORT AMD:]
19 | import torch
20 | 
21 | ###
22 | 
23 | 
24 | def get_version(pkg):
25 |     try:
26 |         return version(pkg)
27 |     except PackageNotFoundError:
28 |         return None
29 | 
30 | 
31 | package_name = "vllm"
32 | package_version = get_version(package_name)
33 | 
34 | ###
35 | # package_version = get_version(package_name)
36 | # [SUPPORT AMD:]
37 | if "AMD" in torch.cuda.get_device_name():
38 |     import re
39 | 
40 |     package_version = version(package_name)
41 |     package_version = re.match(r"(\d+\.\d+\.?\d*)", package_version).group(1)
42 | else:
43 |     package_version = get_version(package_name)
44 | ###
45 | 
46 | if package_version <= "0.6.3":
47 |     vllm_mode = "customized"
48 |     from .fire_vllm_rollout import FIREvLLMRollout
49 |     from .vllm_rollout import vLLMRollout
50 | else:
51 |     vllm_mode = "spmd"
52 |     from .vllm_rollout_spmd import vLLMRollout
53 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from verl.utils.import_utils import (
16 |     is_megatron_core_available,
17 |     is_sglang_available,
18 |     is_vllm_available,
19 | )
20 | 
21 | from .base import BaseShardingManager
22 | from .fsdp_ulysses import FSDPUlyssesShardingManager
23 | 
24 | if is_vllm_available():
25 |     from .fsdp_vllm import FSDPVLLMShardingManager
26 | else:
27 |     FSDPVLLMShardingManager = None
28 | 
29 | # NOTE(linjunrong): Due to recent fp8 support in SGLang. Now importing any symbol relate to SGLang's model_runner would check CUDA device capability.
30 | # However, due to veRL's setting, the main process of ray can not find any CUDA device, which would potentially lead to:
31 | # "RuntimeError: No CUDA GPUs are available".
32 | # For this reason, sharding_manager.__init__ should not import SGLangShardingManager and user need to import use the abs path.
33 | # check: https://github.com/sgl-project/sglang/blob/00f42707eaddfc2c0528e5b1e0094025c640b7a0/python/sglang/srt/layers/quantization/fp8_utils.py#L76
34 | # if is_sglang_available():
35 | #     from .fsdp.fsdp_sglang import FSDPSGLangShardingManager
36 | # else:
37 | #     FSDPSGLangShardingManager = None
38 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 |     def __enter__(self):
23 |         pass
24 | 
25 |     def __exit__(self, exc_type, exc_value, traceback):
26 |         pass
27 | 
28 |     def preprocess_data(self, data: DataProto) -> DataProto:
29 |         return data
30 | 
31 |     def postprocess_data(self, data: DataProto) -> DataProto:
32 |         return data
33 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/patch/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .fsdp_vllm_patch import patched_ds_v3_load_weights, patched_qwen_moe_load_weights
16 | 


--------------------------------------------------------------------------------