├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── rl-factory.png
├── docker
    ├── Apptainerfile.rocm
    ├── Dockerfile.megatron
    ├── Dockerfile.ngc.vllm
    ├── Dockerfile.ngc.vllm0.8
    ├── Dockerfile.ngc.vllm0.8.sagemaker
    ├── Dockerfile.rocm
    ├── Dockerfile.sglang
    ├── Dockerfile.vemlp.vllm.te
    ├── Dockerfile.vllm.sglang.megatron
    └── Dockfile.ngc.vllm0.8
├── docs
    ├── Makefile
    ├── README.md
    ├── README_vllm0.7.md
    ├── README_vllm0.8.md
    ├── _static
    │   ├── js
    │   │   └── runllm-widget.js
    │   └── logo.png
    ├── advance
    │   ├── checkpoint.rst
    │   ├── dpo_extension.rst
    │   ├── fsdp_extension.rst
    │   ├── megatron_extension.rst
    │   ├── placement.rst
    │   └── rope.rst
    ├── amd_tutorial
    │   ├── amd_build_dockerfile.md
    │   ├── amd_build_dockerfile_page.rst
    │   ├── amd_existing_docker.md
    │   └── amd_vllm_page.rst
    ├── api
    │   ├── trainer.rst
    │   └── utils.rst
    ├── conf.py
    ├── data.rst
    ├── examples
    │   ├── config.rst
    │   ├── gsm8k_example.rst
    │   ├── multi_modal_example.rst
    │   ├── ppo_code_architecture.rst
    │   └── sandbox_fusion_example.rst
    ├── experiment
    │   └── ppo.rst
    ├── faq
    │   └── faq.rst
    ├── hybrid_flow.rst
    ├── index.rst
    ├── perf
    │   ├── device_tuning.rst
    │   └── perf_tuning.rst
    ├── preparation
    │   ├── prepare_data.rst
    │   └── reward_function.rst
    ├── requirements-docs.txt
    ├── rl_factory
    │   ├── en
    │   │   ├── centralized_tool_manager.md
    │   │   ├── framework_design.md
    │   │   ├── main_tutorial.md
    │   │   ├── rewards.md
    │   │   └── tools.md
    │   ├── main_tutorial.md
    │   ├── main_tutorial_zh.md
    │   └── zh
    │   │   ├── README.md
    │   │   ├── main_tutorial.md
    │   │   ├── rewards.md
    │   │   └── tools.md
    ├── sglang_multiturn
    │   └── multiturn.rst
    ├── start
    │   ├── install.rst
    │   ├── multinode.rst
    │   ├── quickstart.rst
    │   └── ray_debug_tutorial.rst
    └── workers
    │   ├── fsdp_workers.rst
    │   ├── megatron_workers.rst
    │   ├── ray_trainer.rst
    │   └── sglang_worker.rst
├── envs
    ├── __init__.py
    ├── base.py
    ├── configs
    │   ├── calculator.json
    │   ├── mcp_tools.pydata
    │   └── sse_mcp_tools.pydata
    ├── reward_rollout_example.py
    ├── search.py
    ├── tool_manager
    │   ├── __init__.py
    │   ├── base_manager.py
    │   ├── config_manager.py
    │   ├── qwen2_5_manager.py
    │   └── qwen3_manager.py
    ├── tools
    │   └── search.py
    └── utils
    │   ├── get_prompt.py
    │   ├── mcp_manager.py
    │   ├── schema.py
    │   ├── tool_utils.py
    │   └── util.py
├── examples
    ├── checkpoint
    │   ├── run_deepseek_megatron_ckpt.sh
    │   └── run_qwen_megatron_ckpt.sh
    ├── data_preprocess
    │   ├── full_hh_rlhf.py
    │   ├── geo3k.py
    │   ├── gsm8k.py
    │   ├── gsm8k_multiturn_w_tool.py
    │   ├── hellaswag.py
    │   ├── math_dataset.py
    │   └── multiturn.py
    ├── generation
    │   ├── run_deepseek7b_mutli_node.sh
    │   └── run_deepseek_v2_lite_math.sh
    ├── grpo_trainer
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_math.sh
    │   ├── run_deepseek7b_llm_math_megatron.sh
    │   ├── run_deepseek7b_llm_megatron.sh
    │   ├── run_deepseek7b_llm_seq_balance.sh
    │   ├── run_qwen2-7b.sh
    │   ├── run_qwen2-7b_math.sh
    │   ├── run_qwen2-7b_math_megatron.sh
    │   ├── run_qwen2-7b_megatron.sh
    │   ├── run_qwen2-7b_seq_balance.sh
    │   ├── run_qwen2-7b_sgl_megatron.sh
    │   ├── run_qwen2_5-7b_math_megatron_diff_tp.sh
    │   ├── run_qwen2_5_vl-7b.sh
    │   └── run_qwen3-8b.sh
    ├── ppo_trainer
    │   ├── naive_chat_scheduler.py
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_modelscope.sh
    │   ├── run_deepseek7b_llm_sandbox_fusion.sh
    │   ├── run_deepseek7b_llm_sp2.sh
    │   ├── run_deepseek_full_hh_rlhf.sh
    │   ├── run_deepseek_math_gsm8k_megatron.sh
    │   ├── run_deepseek_megatron.sh
    │   ├── run_gemma.sh
    │   ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh
    │   ├── run_qwen2-7b_math_gsm8k_megatron.sh
    │   ├── run_qwen2-7b_megatron.sh
    │   ├── run_qwen2-7b_rm.sh
    │   ├── run_qwen2-7b_rm_seq_balance.sh
    │   ├── run_qwen2-7b_seq_balance.sh
    │   ├── run_qwen2-7b_sglang_seq_balance.sh
    │   ├── run_qwen2.5-32b.sh
    │   └── verl_getting_started.ipynb
    ├── ray
    │   └── tutorial.ipynb
    ├── reinforce_plus_plus_trainer
    │   ├── run_qwen2-7b_math_rf.sh
    │   └── run_qwen2-7b_math_rf_baseline.sh
    ├── remax_trainer
    │   ├── run_qwen2.5-3b_seq_balance.sh
    │   └── run_qwen2.5-7b_seq_balance.sh
    ├── rl_factory
    │   └── reward_rollout_test.sh
    ├── rloo_trainer
    │   └── run_qwen2-7b.sh
    ├── sft
    │   ├── gsm8k
    │   │   ├── run_deepseek_6b7.sh
    │   │   ├── run_gemma_2b.sh
    │   │   ├── run_gemma_7b.sh
    │   │   ├── run_qwen_05_peft.sh
    │   │   ├── run_qwen_05_sp2.sh
    │   │   └── run_qwen_05_sp2_liger.sh
    │   └── multiturn
    │   │   └── run_qwen_05_sp2.sh
    ├── sglang_multiturn
    │   ├── README.md
    │   ├── config
    │   │   ├── gsm8k_multiturn_grpo.yaml
    │   │   └── tool_config
    │   │   │   └── gsm8k_tool_config.yaml
    │   ├── run_qwen2.5-3b_gsm8k_multiturn.sh
    │   └── run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh
    ├── slurm
    │   └── ray_on_slurm.slurm
    ├── split_placement
    │   ├── README.md
    │   ├── config
    │   │   └── ppo_trainer_split.yaml
    │   ├── main_ppo_split.py
    │   ├── run_deepseek7b_llm.sh
    │   └── split_monkey_patch.py
    └── tuning
    │   ├── 14b
    │       └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh
    │   ├── 32b
    │       └── qwen2_32B_grpo_8_h20_megatron_vllm.sh
    │   ├── 70b
    │       ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh
    │       └── qwen2-70b_grpo_32_h800_fsdp_vllm.sh
    │   └── 7b
    │       └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh
├── generator
    ├── __init__.py
    ├── api_generator.py
    └── base_generator.py
├── install.sh
├── main_grpo.sh
├── main_ppo.sh
├── pyproject.toml
├── rag_server
    ├── README.md
    ├── data_process
    │   └── nq_search.py
    ├── download.py
    ├── launch.sh
    └── retrieval_server.py
├── recipe
    ├── dapo
    │   ├── README.md
    │   ├── config
    │   │   └── dapo_trainer.yaml
    │   ├── dapo_ray_trainer.py
    │   ├── main_dapo.py
    │   ├── prepare_dapo_data.sh
    │   ├── run_dapo_early_qwen2.5_32b.sh
    │   ├── run_dapo_qwen2.5_32b.sh
    │   ├── run_dapo_wo_ds_qwen2.5_32b.sh
    │   └── test_dapo_7b.sh
    ├── drgrpo
    │   └── README.md
    ├── prime
    │   ├── __init__.py
    │   ├── config
    │   │   └── prime_trainer.yaml
    │   ├── main_prime.py
    │   ├── prime_core_algos.py
    │   ├── prime_dp_rm.py
    │   ├── prime_fsdp_workers.py
    │   ├── prime_ray_trainer.py
    │   └── run_prime_qwen.sh
    ├── r1
    │   ├── README.md
    │   ├── __init__.py
    │   ├── config
    │   │   └── evaluation.yaml
    │   ├── data_process.py
    │   ├── main_eval.py
    │   ├── reward_score.py
    │   ├── run_r1_distill_qwen.sh
    │   └── tasks
    │   │   ├── __init__.py
    │   │   ├── gpqa.py
    │   │   ├── livecodebench.py
    │   │   └── math.py
    └── sppo
    │   ├── README.md
    │   ├── __init__.py
    │   ├── config
    │       └── sppo_trainer.yaml
    │   ├── dp_actor.py
    │   ├── main_sppo.py
    │   ├── run_qwen2.5-7b_rm.sh
    │   ├── sppo_ray_trainer.py
    │   └── sppo_worker.py
├── requirements.txt
├── requirements_sglang.txt
├── scripts
    ├── converter_hf_to_mcore.py
    ├── diagnose.py
    ├── format.sh
    ├── install_nginx.sh
    ├── install_vllm_sglang_mcore.sh
    ├── model_merger.py
    ├── nq_search.py
    ├── run_vllm_with_nginx.sh
    └── vllm_server.sh
├── setup.py
├── tests
    ├── __init__.py
    ├── checkpoint
    │   ├── run_deepseek_megatron_ckpt.sh
    │   ├── run_qwen_megatron_ckpt.sh
    │   └── test_fsdp_ckpt.py
    ├── distributed
    │   ├── run_all.sh
    │   └── test_tensor_dict.py
    ├── distro
    │   └── requirements.py
    ├── e2e
    │   ├── __init__.py
    │   ├── arithmetic_sequence
    │   │   ├── data
    │   │   │   └── create_dataset.py
    │   │   ├── model
    │   │   │   ├── config.json
    │   │   │   ├── create_model_tokenizer.py
    │   │   │   ├── generation_config.json
    │   │   │   ├── model.safetensors
    │   │   │   └── tokenizer_config.json
    │   │   └── rl
    │   │   │   ├── README.md
    │   │   │   └── main_trainer.py
    │   ├── check_custom_rwd_fn.py
    │   ├── check_results.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   └── digit_completion
    │   │   │   ├── __init__.py
    │   │   │   ├── task.py
    │   │   │   └── tokenizer.py
    │   ├── generation
    │   │   └── run_gen_qwen05.sh
    │   ├── ppo_trainer
    │   │   ├── run_function_reward.sh
    │   │   └── run_model_reward.sh
    │   ├── run_dapo.sh
    │   ├── run_deepseek_grpo.sh
    │   ├── run_deepseek_grpo_megatron.sh
    │   ├── run_deepseek_megatron.sh
    │   ├── run_deepseek_megatron_parallelism.sh
    │   ├── run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
    │   ├── run_ppo_trainer_megatron.sh
    │   ├── run_prime.sh
    │   ├── run_qwen2vl_geo3k_function_rm.sh
    │   ├── run_qwen_grpo.sh
    │   ├── run_qwen_grpo_megatron.sh
    │   ├── run_qwen_gsm8k_custom_function_rm.sh
    │   ├── run_qwen_gsm8k_function_rm.sh
    │   ├── run_qwen_gsm8k_function_rm_grpo.sh
    │   ├── run_qwen_gsm8k_function_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_function_rm_remax.sh
    │   ├── run_qwen_gsm8k_model_rm.sh
    │   ├── run_qwen_gsm8k_model_rm_liger_kernel.sh
    │   ├── run_qwen_gsm8k_model_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_model_rm_seq_balance.sh
    │   ├── run_qwen_gsm8k_model_rm_ulysses.sh
    │   ├── run_qwen_gsm8k_prime.sh
    │   ├── run_qwen_megatron.sh
    │   ├── run_qwen_megatron_parallelism.sh
    │   ├── run_r1_distill_qwen_aime24_eval.sh
    │   ├── run_ray_trainer.sh
    │   ├── run_ray_trainer_fire_sampling.sh
    │   ├── run_ray_trainer_rmpad.sh
    │   ├── run_sppo.sh
    │   ├── run_test.sh
    │   └── sft
    │   │   ├── run_sft.sh
    │   │   └── test_sp_loss_match.py
    ├── generation
    │   └── run_gen_qwen05.sh
    ├── gpu_utility
    │   ├── test_memory_buffers.py
    │   ├── test_ops.py
    │   └── test_torch_functional.py
    ├── kernels
    │   └── test_linear_cross_entropy.py
    ├── kill_github_tests.sh
    ├── model
    │   ├── test_transformer.py
    │   └── test_transformers_ulysses.py
    ├── models
    │   ├── test_transformer.py
    │   └── test_transformers_ulysses.py
    ├── ray
    │   ├── check_worker_alive
    │   │   └── main.py
    │   ├── detached_worker
    │   │   ├── README.md
    │   │   ├── client.py
    │   │   ├── run.sh
    │   │   └── server.py
    │   ├── test_check_worker_alive.py
    │   ├── test_colocated_workers.py
    │   ├── test_data_transfer.py
    │   ├── test_driverfunc_to_worker.py
    │   ├── test_high_level_scheduling_api.py
    │   ├── test_ray_local_envs.py
    │   ├── test_rvdz.py
    │   ├── test_worker_group_basics.py
    │   └── test_worker_group_torch.py
    ├── ray_cpu
    │   ├── check_worker_alive
    │   │   └── main.py
    │   ├── test_auto_padding.py
    │   ├── test_check_worker_alive.py
    │   ├── test_decorator.py
    │   ├── test_fused_workers.py
    │   └── test_ray_local_envs.py
    ├── ray_gpu
    │   ├── detached_worker
    │   │   ├── README.md
    │   │   ├── client.py
    │   │   ├── run.sh
    │   │   └── server.py
    │   ├── test_colocated_workers.py
    │   ├── test_colocated_workers_fused.py
    │   ├── test_data_transfer.py
    │   ├── test_driverfunc_to_worker.py
    │   ├── test_high_level_scheduling_api.py
    │   ├── test_rvdz.py
    │   ├── test_worker_group_basics.py
    │   └── test_worker_group_torch.py
    ├── reward_score
    │   └── test_sandbox_fusion.py
    ├── rl_factory
    │   ├── envs
    │   │   └── test_tool_use.py
    │   ├── generator
    │   │   ├── async_generator_test.py
    │   │   ├── async_results.csv
    │   │   └── test.sh
    │   ├── rewarder
    │   │   └── test_parallel.py
    │   └── test_qwen3_manager.py
    ├── rollout
    │   ├── run_fsdp_vllm.py
    │   ├── test_sglang_spmd.py
    │   ├── test_vllm_hf_loader.py
    │   └── test_vllm_spmd.py
    ├── sandbox
    │   └── test_sandbox.py
    ├── sanity
    │   ├── check_license.py
    │   └── test_import.py
    ├── sft
    │   ├── run_sft.sh
    │   ├── run_sft_qwen05_peft.sh
    │   ├── run_sft_qwen05_sp2_liger.sh
    │   ├── run_sft_sp_loss_match.sh
    │   └── test_sp_loss_match.py
    ├── single_controller
    │   └── base
    │   │   └── test_decorator.py
    ├── test_protocol.py
    ├── trainer
    │   ├── __init__.py
    │   └── ppo
    │   │   ├── __init__.py
    │   │   └── test_metric_utils.py
    ├── utility
    │   └── test_tensor_dict_utilities.py
    ├── utils
    │   ├── cpu_tests
    │   │   ├── test_fs.py
    │   │   ├── test_import_utils.py
    │   │   ├── test_model.py
    │   │   ├── test_module.py
    │   │   └── test_timeout_decorator.py
    │   └── gpu_tests
    │   │   ├── checkpoint
    │   │       └── test_fsdp_ckpt.py
    │   │   ├── dataset
    │   │       ├── test_multiturn_sft_dataset.py
    │   │       ├── test_rl_dataset.py
    │   │       ├── test_rm_dataset.py
    │   │       └── test_sft_dataset.py
    │   │   ├── test_flops_counter.py
    │   │   ├── test_seqlen_balancing.py
    │   │   └── test_torch_functional.py
    ├── verl
    │   └── utils
    │   │   └── dataset
    │   │       ├── test_rl_dataset.py
    │   │       ├── test_rm_dataset.py
    │   │       └── test_sft_dataset.py
    └── workers
    │   └── rollout
    │       ├── async_rollout_utils.py
    │       ├── run_fsdp_vllm.py
    │       ├── test_hf_rollout.py
    │       ├── test_sglang_async_rollout_w_tools.py
    │       ├── test_sglang_async_spmd.py
    │       ├── test_sglang_spmd.py
    │       ├── test_vllm_hf_loader.py
    │       ├── test_vllm_multi_turn.py
    │       ├── test_vllm_spmd.py
    │       ├── test_vllm_tool_calling.py
    │       └── utils_sglang.py
├── verl
    ├── __init__.py
    ├── models
    │   ├── README.md
    │   ├── __init__.py
    │   ├── llama
    │   │   ├── __init__.py
    │   │   └── megatron
    │   │   │   ├── __init__.py
    │   │   │   ├── checkpoint_utils
    │   │   │       ├── __init__.py
    │   │   │       ├── llama_loader.py
    │   │   │       ├── llama_loader_depracated.py
    │   │   │       └── llama_saver.py
    │   │   │   ├── layers
    │   │   │       ├── __init__.py
    │   │   │       ├── parallel_attention.py
    │   │   │       ├── parallel_decoder.py
    │   │   │       ├── parallel_linear.py
    │   │   │       ├── parallel_mlp.py
    │   │   │       └── parallel_rmsnorm.py
    │   │   │   └── modeling_llama_megatron.py
    │   ├── mcore
    │   │   ├── __init__.py
    │   │   ├── config_converter.py
    │   │   ├── loader.py
    │   │   ├── model_forward.py
    │   │   ├── model_initializer.py
    │   │   ├── readme.md
    │   │   ├── registry.py
    │   │   ├── saver.py
    │   │   ├── util.py
    │   │   └── weight_converter.py
    │   ├── qwen2
    │   │   ├── __init__.py
    │   │   └── megatron
    │   │   │   ├── __init__.py
    │   │   │   ├── checkpoint_utils
    │   │   │       ├── __init__.py
    │   │   │       ├── qwen2_loader.py
    │   │   │       ├── qwen2_loader_depracated.py
    │   │   │       └── qwen2_saver.py
    │   │   │   ├── layers
    │   │   │       ├── __init__.py
    │   │   │       ├── parallel_attention.py
    │   │   │       ├── parallel_decoder.py
    │   │   │       ├── parallel_linear.py
    │   │   │       ├── parallel_mlp.py
    │   │   │       └── parallel_rmsnorm.py
    │   │   │   └── modeling_qwen2_megatron.py
    │   ├── registry.py
    │   ├── transformers
    │   │   ├── __init__.py
    │   │   ├── llama.py
    │   │   ├── monkey_patch.py
    │   │   ├── qwen2.py
    │   │   ├── qwen2_5_vl.py
    │   │   └── qwen2_vl.py
    │   └── weight_loader_registry.py
    ├── protocol.py
    ├── single_controller
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── decorator.py
    │   │   ├── megatron
    │   │   │   ├── __init__.py
    │   │   │   ├── worker.py
    │   │   │   └── worker_group.py
    │   │   ├── register_center
    │   │   │   ├── __init__.py
    │   │   │   └── ray.py
    │   │   ├── worker.py
    │   │   └── worker_group.py
    │   └── ray
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── megatron.py
    ├── third_party
    │   ├── __init__.py
    │   ├── sglang
    │   │   ├── __init__.py
    │   │   └── parallel_state.py
    │   └── vllm
    │   │   ├── __init__.py
    │   │   ├── vllm_v_0_3_1
    │   │       ├── __init__.py
    │   │       ├── arg_utils.py
    │   │       ├── config.py
    │   │       ├── llm.py
    │   │       ├── llm_engine_sp.py
    │   │       ├── model_loader.py
    │   │       ├── model_runner.py
    │   │       ├── parallel_state.py
    │   │       ├── tokenizer.py
    │   │       ├── weight_loaders.py
    │   │       └── worker.py
    │   │   ├── vllm_v_0_4_2
    │   │       ├── __init__.py
    │   │       ├── arg_utils.py
    │   │       ├── config.py
    │   │       ├── dtensor_weight_loaders.py
    │   │       ├── hf_weight_loader.py
    │   │       ├── llm.py
    │   │       ├── llm_engine_sp.py
    │   │       ├── megatron_weight_loaders.py
    │   │       ├── model_loader.py
    │   │       ├── model_runner.py
    │   │       ├── parallel_state.py
    │   │       ├── spmd_gpu_executor.py
    │   │       ├── tokenizer.py
    │   │       └── worker.py
    │   │   ├── vllm_v_0_5_4
    │   │       ├── __init__.py
    │   │       ├── arg_utils.py
    │   │       ├── config.py
    │   │       ├── dtensor_weight_loaders.py
    │   │       ├── hf_weight_loader.py
    │   │       ├── llm.py
    │   │       ├── llm_engine_sp.py
    │   │       ├── megatron_weight_loaders.py
    │   │       ├── model_loader.py
    │   │       ├── model_runner.py
    │   │       ├── parallel_state.py
    │   │       ├── spmd_gpu_executor.py
    │   │       ├── tokenizer.py
    │   │       └── worker.py
    │   │   └── vllm_v_0_6_3
    │   │       ├── __init__.py
    │   │       ├── arg_utils.py
    │   │       ├── config.py
    │   │       ├── dtensor_weight_loaders.py
    │   │       ├── hf_weight_loader.py
    │   │       ├── llm.py
    │   │       ├── llm_engine_sp.py
    │   │       ├── megatron_weight_loaders.py
    │   │       ├── model_loader.py
    │   │       ├── model_runner.py
    │   │       ├── parallel_state.py
    │   │       ├── spmd_gpu_executor.py
    │   │       ├── tokenizer.py
    │   │       └── worker.py
    ├── tools
    │   ├── __init__.py
    │   ├── base_tool.py
    │   ├── gsm8k_tool.py
    │   └── schemas.py
    ├── trainer
    │   ├── __init__.py
    │   ├── config
    │   │   ├── evaluation.yaml
    │   │   ├── generation.yaml
    │   │   ├── ppo_megatron_trainer.yaml
    │   │   ├── ppo_trainer.yaml
    │   │   └── sft_trainer.yaml
    │   ├── fsdp_sft_trainer.py
    │   ├── main_eval.py
    │   ├── main_generation.py
    │   ├── main_ppo.py
    │   ├── ppo
    │   │   ├── __init__.py
    │   │   ├── core_algos.py
    │   │   ├── metric_utils.py
    │   │   ├── ray_trainer.py
    │   │   └── reward.py
    │   └── runtime_env.yaml
    ├── utils
    │   ├── __init__.py
    │   ├── checkpoint
    │   │   ├── __init__.py
    │   │   ├── checkpoint_manager.py
    │   │   ├── fsdp_checkpoint_manager.py
    │   │   └── megatron_checkpoint_manager.py
    │   ├── config.py
    │   ├── dataset
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── multiturn_sft_dataset.py
    │   │   ├── rl_dataset.py
    │   │   ├── rm_dataset.py
    │   │   ├── sft_dataset.py
    │   │   └── vision_utils.py
    │   ├── debug
    │   │   ├── __init__.py
    │   │   ├── performance.py
    │   │   ├── profile.py
    │   │   └── trajectory_tracker.py
    │   ├── distributed.py
    │   ├── experimental
    │   │   ├── __init__.py
    │   │   └── torch_functional.py
    │   ├── flops_counter.py
    │   ├── fs.py
    │   ├── fsdp_utils.py
    │   ├── hdfs_io.py
    │   ├── import_utils.py
    │   ├── logger
    │   │   ├── __init__.py
    │   │   └── aggregate_logger.py
    │   ├── logging_utils.py
    │   ├── megatron
    │   │   ├── __init__.py
    │   │   ├── memory.py
    │   │   ├── optimizer.py
    │   │   ├── pipeline_parallel.py
    │   │   ├── sequence_parallel.py
    │   │   └── tensor_parallel.py
    │   ├── megatron_utils.py
    │   ├── memory_buffer.py
    │   ├── metric
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── model.py
    │   ├── net_utils.py
    │   ├── py_functional.py
    │   ├── ray_utils.py
    │   ├── rendezvous
    │   │   ├── __init__.py
    │   │   └── ray_backend.py
    │   ├── reward_score
    │   │   ├── __init__.py
    │   │   ├── geo3k.py
    │   │   ├── gsm8k.py
    │   │   ├── math.py
    │   │   ├── math_batch.py
    │   │   ├── math_dapo.py
    │   │   ├── math_verify.py
    │   │   ├── prime_code
    │   │   │   ├── __init__.py
    │   │   │   ├── testing_util.py
    │   │   │   └── utils.py
    │   │   ├── prime_math
    │   │   │   ├── __init__.py
    │   │   │   ├── grader.py
    │   │   │   └── math_normalize.py
    │   │   ├── sandbox_fusion
    │   │   │   ├── __init__.py
    │   │   │   └── utils.py
    │   │   └── search.py
    │   ├── seqlen_balancing.py
    │   ├── tokenizer.py
    │   ├── torch_dtypes.py
    │   ├── torch_functional.py
    │   ├── tracking.py
    │   ├── ulysses.py
    │   ├── vllm_request.py
    │   └── vllm_utils.py
    ├── version
    │   └── version
    └── workers
    │   ├── __init__.py
    │   ├── actor
    │       ├── __init__.py
    │       ├── base.py
    │       ├── dp_actor.py
    │       └── megatron_actor.py
    │   ├── critic
    │       ├── __init__.py
    │       ├── base.py
    │       ├── dp_critic.py
    │       └── megatron_critic.py
    │   ├── fsdp_workers.py
    │   ├── megatron_workers.py
    │   ├── reward_manager
    │       ├── __init__.py
    │       ├── batch.py
    │       ├── dapo.py
    │       ├── naive.py
    │       ├── parallel.py
    │       └── prime.py
    │   ├── reward_model
    │       ├── __init__.py
    │       ├── base.py
    │       └── megatron
    │       │   ├── __init__.py
    │       │   └── reward_model.py
    │   ├── rollout
    │       ├── __init__.py
    │       ├── async_server.py
    │       ├── base.py
    │       ├── hf_rollout.py
    │       ├── naive
    │       │   ├── __init__.py
    │       │   └── naive_rollout.py
    │       ├── schemas.py
    │       ├── sglang_rollout
    │       │   ├── __init__.py
    │       │   ├── async_sglang_rollout.py
    │       │   └── sglang_rollout.py
    │       ├── tokenizer.py
    │       └── vllm_rollout
    │       │   ├── __init__.py
    │       │   ├── fire_vllm_rollout.py
    │       │   ├── vllm_async_server.py
    │       │   ├── vllm_rollout.py
    │       │   └── vllm_rollout_spmd.py
    │   └── sharding_manager
    │       ├── __init__.py
    │       ├── base.py
    │       ├── fsdp_sglang.py
    │       ├── fsdp_ulysses.py
    │       ├── fsdp_vllm.py
    │       ├── fsdp_vllm_reward.py
    │       ├── megatron_sglang.py
    │       └── megatron_vllm.py
├── webui
    ├── README.md
    ├── app.py
    ├── components
    │   └── rewards
    │   │   └── graders
    │   │       ├── __init__.py
    │   │       ├── base.py
    │   │       ├── graders.py
    │   │       └── qwen_math.py
    ├── requirements.txt
    ├── run_webui.sh
    └── tabs
    │   ├── __init__.py
    │   ├── data_processing.py
    │   ├── project_management.py
    │   ├── reward_definition.py
    │   ├── tool_definition.py
    │   └── training_deployment.py
└── workspace
    └── tools
        └── code_interpreter
            ├── kernel_connection_file_11d607b7-be32-4947-9087-88f808616b56_30594.json
            ├── kernel_connection_file_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.json
            ├── kernel_connection_file_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.json
            ├── kernel_connection_file_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.json
            ├── kernel_connection_file_67ad306a-e335-4294-b241-514085b015a3_6550.json
            ├── kernel_connection_file_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.json
            ├── kernel_connection_file_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.json
            ├── kernel_connection_file_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.json
            ├── launch_kernel_11d607b7-be32-4947-9087-88f808616b56_30594.py
            ├── launch_kernel_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.py
            ├── launch_kernel_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.py
            ├── launch_kernel_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.py
            ├── launch_kernel_67ad306a-e335-4294-b241-514085b015a3_6550.py
            ├── launch_kernel_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.py
            ├── launch_kernel_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.py
            └── launch_kernel_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.py


/assets/rl-factory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/d0615d11b1f2e201f5a42403b1fc4cb01eb2db95/assets/rl-factory.png


--------------------------------------------------------------------------------
/docker/Apptainerfile.rocm:
--------------------------------------------------------------------------------
 1 | Bootstrap: docker
 2 | 
 3 | # Support - Traing: fsdp; Inference: vllm
 4 | # FROM: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
 5 | # Support - Traing: fsdp; Inference: vllm, sglang
 6 | FROM lmsysorg/sglang:v0.4.5-rocm630
 7 | 
 8 | %environment
 9 |     export PYTORCH_ROCM_ARCH="gfx90a;gfx942"
10 | 
11 |     export HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
12 |     export CFLAGS="-D__HIP_PLATFORM_AMD__"
13 |     export CXXFLAGS="-D__HIP_PLATFORM_AMD__"
14 | 
15 | %post
16 |     # Create source directory
17 |     mkdir -p /opt/src
18 | 
19 |     # Uninstall and reinstall vllm
20 |     pip uninstall -y vllm
21 |     cd /opt/src
22 |     git clone -b v0.6.3 https://github.com/vllm-project/vllm.git
23 |     cd vllm
24 |     MAX_JOBS=$(nproc) python3 setup.py install
25 |     cd /opt
26 |     rm -rf /opt/src/vllm
27 | 
28 |     # Install dependencies
29 |     pip install "tensordict<0.6" --no-deps
30 |     pip install accelerate \
31 |         codetiming \
32 |         datasets \
33 |         dill \
34 |         hydra-core \
35 |         liger-kernel \
36 |         numpy \
37 |         pandas \
38 |         peft \
39 |         "pyarrow>=15.0.0" \
40 |         pylatexenc \
41 |         "ray[data,train,tune,serve]" \
42 |         torchdata \
43 |         transformers \
44 |         wandb \
45 |         orjson \
46 |         pybind11
47 | 
48 |     # Clone and install verl from GitHub
49 |     cd /opt
50 |     git clone https://github.com/volcengine/verl.git
51 |     cd verl
52 |     # Uncomment to use a specific version
53 |     # git checkout v0.3.0.post0
54 |     pip install -e . --no-deps
55 | 
56 |     # Install torch_memory_saver
57 |     pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps


--------------------------------------------------------------------------------
/docker/Dockerfile.megatron:
--------------------------------------------------------------------------------
1 | FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
2 | 
3 | RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
4 | 
5 | RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM
6 | 
7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
8 | # unset for now
9 | RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e .


--------------------------------------------------------------------------------
/docker/Dockerfile.rocm:
--------------------------------------------------------------------------------
 1 | #  Build the docker in the repo dir:
 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 .
 3 | # docker images # you can find your built docker
 4 | 
 5 | 
 6 | # Support - Traing: fsdp; Inference: vllm
 7 | # FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
 8 | # Support - Traing: fsdp; Inference: vllm, sglang
 9 | FROM lmsysorg/sglang:v0.4.6.post1-rocm630
10 | 
11 | # Set working directory
12 | # WORKDIR $PWD/app
13 | 
14 | # Set environment variables
15 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
16 | 
17 | ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
18 | ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
19 | ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
20 | 
21 | # Install vllm
22 | RUN pip uninstall -y vllm && \
23 |     rm -rf vllm && \
24 |     git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \
25 |     cd vllm && \
26 |     MAX_JOBS=$(nproc) python3 setup.py install && \
27 |     cd .. && \
28 |     rm -rf vllm
29 | 
30 | # Copy the entire project directory
31 | COPY . .
32 | 
33 | # Install dependencies
34 | RUN pip install "tensordict<0.6" --no-deps && \
35 |     pip install accelerate \
36 |     codetiming \
37 |     datasets \
38 |     dill \
39 |     hydra-core \
40 |     liger-kernel \
41 |     numpy \
42 |     pandas \
43 |     peft \
44 |     "pyarrow>=15.0.0" \
45 |     pylatexenc \
46 |     "ray[data,train,tune,serve]>=2.45.0" \
47 |     torchdata \
48 |     transformers \
49 |     wandb \
50 |     orjson \
51 |     pybind11 && \
52 |     pip install -e . --no-deps
53 | 
54 | # Install torch_memory_saver
55 | RUN pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps


--------------------------------------------------------------------------------
/docker/Dockerfile.vemlp.vllm.te:
--------------------------------------------------------------------------------
 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE .
 2 | 
 3 | # the one in docker.io is an alias for the one veturbo
 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124
 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base
 6 | 
 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
 8 | # unset for now
 9 | RUN pip3 config unset global.index-url
10 | 
11 | # transformers 4.47.0 contains the following bug:
12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask'
13 | RUN pip3 install --no-cache-dir \
14 |     torch==2.4.0 \
15 |     accelerate \
16 |     codetiming \
17 |     dill \
18 |     hydra-core \
19 |     numpy \
20 |     pybind11 \
21 |     tensordict \
22 |     "transformers <= 4.46.0"
23 | 
24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation
25 | 
26 | # vllm depends on ray
27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10
28 | 
29 | # install apex
30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
31 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
32 |     git+https://github.com/NVIDIA/apex
33 | 
34 | # install Transformer Engine
35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req
36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM
37 | # - cudnn is required by TransformerEngine
38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \
39 | #     pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0
40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation
41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7
42 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # verl documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and navigate to http://localhost:8000 to view the documentation.


--------------------------------------------------------------------------------
/docs/README_vllm0.8.md:
--------------------------------------------------------------------------------
 1 | # Upgrading to vLLM >= 0.8
 2 | 
 3 | ## Installation
 4 | 
 5 | Note: This version of verl+vLLM 0.8+ supports **FSDP** for training and **vLLM** for rollout.
 6 | 
 7 | ```bash
 8 | # Create the conda environment
 9 | conda create -n verl python==3.10
10 | conda activate verl
11 | 
12 | # Install verl
13 | git clone https://github.com/volcengine/verl.git
14 | cd verl
15 | pip3 install -e .
16 | 
17 | # Install the latest stable version of vLLM
18 | pip3 install vllm==0.8.3
19 | 
20 | # Install flash-attn
21 | pip3 install flash-attn --no-build-isolation
22 | 
23 | ```
24 | 
25 | We have a pre-built docker image for verl+vLLM 0.8.3. You can direct import it with the following command:
26 | 
27 | ```bash
28 | docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0
29 | ```
30 | 
31 | ## Features
32 | 
33 | vLLM 0.8+ supports cuda graph and V1 engine by default in verl. To enable these features, remember to add the following lines to the bash script:
34 | 
35 | ```bash
36 | actor_rollout_ref.rollout.enforce_eager=False \
37 | actor_rollout_ref.rollout.free_cache_engine=False \
38 | ```
39 | 
40 | and also **remove** the environment variable if it exists:
41 | 
42 | ```bash
43 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
44 | # export VLLM_ATTENTION_BACKEND=XFORMERS
45 | ```
46 | 
47 | ## Notes
48 | 
49 | When you just directly upgrade vllm>=0.8, some dependency packages may undergo version changes. If you encounter the following problems:
50 | 
51 | ```bash
52 | in <module> from torch.multiprocessing.reductions import ForkingPickler ImportError: cannot import name 'ForkingPickler' from 'torch.multiprocessing.reductions' (/opt/conda/lib/python3.11/site-packages/torch/multiprocessing/reductions.py)
53 | ```
54 | 
55 | You need to upgrade `tensordict` to version 0.6.2 using the command `pip install tensordict==0.6.2`.
56 | 


--------------------------------------------------------------------------------
/docs/_static/js/runllm-widget.js:
--------------------------------------------------------------------------------
 1 | document.addEventListener("DOMContentLoaded", function () {
 2 |     var script = document.createElement("script");
 3 |     script.type = "module";
 4 |     script.id = "runllm-widget-script";
 5 |     script.src = "https://widget.runllm.com";
 6 |     script.setAttribute("version", "stable");
 7 |     script.setAttribute("crossorigin", "true");
 8 |     script.setAttribute("runllm-keyboard-shortcut", "Mod+j");
 9 |     script.setAttribute("runllm-name", "verl Chatbot");
10 |     script.setAttribute("runllm-position", "TOP_RIGHT");
11 |     script.setAttribute("runllm-assistant-id", "679");
12 |     script.async = true;
13 |     document.head.appendChild(script);
14 |   });


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/d0615d11b1f2e201f5a42403b1fc4cb01eb2db95/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/docs/advance/rope.rst:
--------------------------------------------------------------------------------
 1 | RoPE Scaling override
 2 | =======================================
 3 | 
 4 | Some models such as `Qwen/Qwen2.5-7B-Instruct <https://huggingface.co/Qwen/Qwen2.5-7B-Instruct#processing-long-texts>`_ support RoPE Scaling but don't have it defined in their config.json file.
 5 | For example, this model supports this configuration:
 6 | 
 7 | .. code:: python
 8 | 
 9 |     {
10 |         ...,
11 |         "rope_scaling": {
12 |             "factor": 4.0,
13 |             "original_max_position_embeddings": 32768,
14 |             "type": "yarn"
15 |         }
16 |     }
17 | 
18 | 
19 | 
20 | In order to support a longer context for such models, you must override the model configs when starting the trainer.
21 | 
22 | PPO example:
23 | 
24 | .. code:: bash
25 | 
26 |     +actor_rollout_ref.model.override_config.rope_scaling.type=yarn \
27 |     +actor_rollout_ref.model.override_config.rope_scaling.factor=4.0 \
28 |     +actor_rollout_ref.model.override_config.rope_scaling.original_max_position_embeddings=32768 \
29 | 
30 | 
31 | And for the critic model
32 | 
33 | .. code:: bash
34 | 
35 |     +critic.model.override_config.rope_scaling.type=yarn \
36 |     +critic.model.override_config.rope_scaling.factor=4.0 \
37 |     +critic.model.override_config.rope_scaling.original_max_position_embeddings=32768 \
38 | 


--------------------------------------------------------------------------------
/docs/api/trainer.rst:
--------------------------------------------------------------------------------
 1 | Trainers
 2 | =========================
 3 | 
 4 | Trainers drive the training loop. Introducing new trainer classes in case of new training paradiam is encouraged.
 5 | 
 6 | .. autosummary::
 7 |    :nosignatures:
 8 | 
 9 |    verl.trainer.ppo.ray_trainer.RayPPOTrainer
10 | 
11 | 
12 | Core APIs
13 | ~~~~~~~~~~~~~~~~~
14 | 
15 | .. autoclass::  verl.trainer.ppo.ray_trainer.RayPPOTrainer
16 | 
17 | .. automodule:: verl.utils.tokenizer
18 |    :members: hf_tokenizer
19 | 
20 | .. automodule:: verl.single_controller
21 |    :members: Worker, WorkerGroup, ClassWithInitArgs, ResourcePool
22 | 


--------------------------------------------------------------------------------
/docs/api/utils.rst:
--------------------------------------------------------------------------------
1 | Training utils
2 | =========================
3 | 
4 | Core APIs
5 | ~~~~~~~~~~~~~~~~~
6 | 
7 | .. automodule::  verl.utils.metric
8 |    :members: reduce_metrics
9 | 


--------------------------------------------------------------------------------
/docs/examples/multi_modal_example.rst:
--------------------------------------------------------------------------------
 1 | Multi-Modal Example Architecture
 2 | =================================
 3 | 
 4 | Introduction
 5 | ------------
 6 | 
 7 | Now, verl has supported multi-modal training. You can use fsdp and 
 8 | vllm/sglang to start a multi-modal RL task. Megatron supports is also 
 9 | on the way.
10 | 
11 | Follow the steps below to quickly start a multi-modal RL task.
12 | 
13 | Step 1: Prepare dataset
14 | -----------------------
15 | 
16 | .. code:: python
17 | 
18 |     # it will be saved in the $HOME/data/geo3k folder
19 |     python examples/data_preprocess/geo3k.py
20 | 
21 | Step 2: Download Model
22 | ----------------------
23 | 
24 | .. code:: bash
25 | 
26 |     # download the model from huggingface
27 |     python3 -c "import transformers; transformers.pipeline(model='Qwen/Qwen2.5-VL-7B-Instruct')"
28 | 
29 | Step 3: Perform GRPO training with multi-modal model on Geo3K Dataset
30 | ---------------------------------------------------------------------
31 | 
32 | .. code:: bash
33 | 
34 |     # run the task
35 |     bash examples/grpo_trainer/run_qwen2_5_vl-7b.sh
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | # markdown suport
 2 | recommonmark
 3 | # markdown table suport
 4 | sphinx-markdown-tables
 5 | 
 6 | # theme default rtd
 7 | 
 8 | # crate-docs-theme
 9 | sphinx-rtd-theme
10 | 
11 | # pin tokenizers version to avoid env_logger version req
12 | tokenizers==0.19.1
13 | 


--------------------------------------------------------------------------------
/docs/sglang_multiturn/multiturn.rst:
--------------------------------------------------------------------------------
 1 | Multi-turn Rollout Support
 2 | =========================
 3 | 
 4 | Basic Configuration
 5 | ~~~~~~~~~~~~~~~~~
 6 | 
 7 | To enable multi-turn rollout, make sure to configure the following fields in your rollout configuration:
 8 | 
 9 | .. code-block:: yaml
10 | 
11 |     actor_rollout_ref: 
12 |         rollout: 
13 |             multi_turn: True
14 |             name: "sglang_async"
15 | 
16 | These configuration activates the sglang_async engine for multi-turn interaction during rollout.
17 | 
18 | Custom Tool Configuration
19 | ~~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | For custom environment interaction tools, you can implement your own tools based on ``verl.tools.base_tool.BaseTool``. Then, specify your tool configurations in a YAML file:
22 | 
23 | .. code-block:: yaml
24 | 
25 |     tools:
26 |       - class_name: ""
27 |         config: {}
28 |         tool_schema:
29 | 
30 | You may refer to GSM8KTool_example_configuration_, which is one example of the tool configurations. Its implementation can be found in gsm8k_tool.py_.
31 | 
32 | Finally, set the ``tools_config_file`` in your rollout config:
33 | 
34 | .. code-block:: yaml
35 | 
36 |     actor_rollout_ref:
37 |         rollout:
38 |             tool_kwargs:
39 |                 tools_config_file: <path_to_tool_yaml_file>
40 | 
41 | This allows integration of customized tool behaviors during actor rollout steps. 
42 | 
43 | GSM8K Multi-turn Training Performance  
44 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
45 | 
46 | See the training performance of multi-turn rollout on the GSM8K task HERE_.
47 | 
48 | .. _HERE: https://wandb.ai/zhaochenyang20/gsm8k_async_rl/runs/1ro1r7om?nw=nwuserzhaochenyang20
49 | 
50 | .. _GSM8KTool_example_configuration: https://github.com/volcengine/verl/blob/main/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml
51 | 
52 | .. _gsm8k_tool.py: https://github.com/volcengine/verl/blob/main/verl/tools/gsm8k_tool.py
53 | 


--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import Env as BaseEnv
 2 | from .search import SearchEnv
 3 | from .reward_rollout_example import RewardRolloutEnv
 4 | 
 5 | __all__ = ['BaseEnv', 'SearchEnv', 'RewardRolloutEnv']
 6 | 
 7 | TOOL_ENV_REGISTRY = {
 8 |     'base': BaseEnv,
 9 |     'search': SearchEnv,
10 |     'reward_rollout': RewardRolloutEnv
11 | }


--------------------------------------------------------------------------------
/envs/configs/mcp_tools.pydata:
--------------------------------------------------------------------------------
1 | [
2 |     {'mcpServers': {
3 |         'search': {
4 |             'command': 'python3',
5 |             'args': ['envs/tools/search.py']
6 |         }
7 |     }}
8 | ]


--------------------------------------------------------------------------------
/envs/configs/sse_mcp_tools.pydata:
--------------------------------------------------------------------------------
1 | [
2 |     {'mcpServers': {
3 |         'meituan_search': {
4 |             'url': 'http://xxxx:8080/sse',
5 |         }
6 |     }}
7 | ]


--------------------------------------------------------------------------------
/envs/tool_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | from .config_manager import ConfigManager
 2 | from .qwen3_manager import QwenManager
 3 | from .qwen2_5_manager import Qwen25Manager
 4 | 
 5 | __all__ = ['ConfigManager', 'QwenManager']
 6 | 
 7 | TOOL_MANAGER_REGISTRY = {
 8 |     'config': ConfigManager,
 9 |     'qwen3': QwenManager,
10 |     'qwen2_5': Qwen25Manager
11 | }


--------------------------------------------------------------------------------
/envs/tool_manager/base_manager.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | 
 5 | class ToolManager(ABC):
 6 |     def __init__(self, verl_config) -> None:
 7 |         self.verl_config = verl_config
 8 |         self.tool_map = {}
 9 |         self._build_tools()
10 |     
11 |     def get_tool(self, name_or_short_name: str):
12 |         """通过名称或简写获取工具
13 |         
14 |         Args:
15 |             name_or_short_name: 工具名称或简写
16 |             
17 |         Returns:
18 |             找到的工具，如果没找到则返回None
19 |         """
20 |         name_or_short_name = str(name_or_short_name)
21 |         return self.tool_map.get(name_or_short_name, None)
22 |     
23 |     @property
24 |     @abstractmethod
25 |     def all_tools(self):
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def _build_tools(self):
30 |         raise NotImplementedError
31 |     
32 |     @abstractmethod
33 |     def execute_actions(self, responses: List[str]):
34 |         raise NotImplementedError
35 | 


--------------------------------------------------------------------------------
/envs/utils/util.py:
--------------------------------------------------------------------------------
 1 | # copy from qwen_agent
 2 | import json
 3 | import json5
 4 | from typing import Optional
 5 | 
 6 | 
 7 | class ToolServiceError(Exception):
 8 |     def __init__(self,
 9 |                  exception: Optional[Exception] = None,
10 |                  code: Optional[str] = None,
11 |                  message: Optional[str] = None,
12 |                  extra: Optional[dict] = None):
13 |         if exception is not None:
14 |             super().__init__(exception)
15 |         else:
16 |             super().__init__(f'\nError code: {code}. Error message: {message}')
17 |         self.exception = exception
18 |         self.code = code
19 |         self.message = message
20 |         self.extra = extra
21 | 
22 | 
23 | class DocParserError(Exception):
24 |     def __init__(self,
25 |                  exception: Optional[Exception] = None,
26 |                  code: Optional[str] = None,
27 |                  message: Optional[str] = None,
28 |                  extra: Optional[dict] = None):
29 |         if exception is not None:
30 |             super().__init__(exception)
31 |         else:
32 |             super().__init__(f'\nError code: {code}. Error message: {message}')
33 |         self.exception = exception
34 |         self.code = code
35 |         self.message = message
36 |         self.extra = extra
37 | 
38 | 
39 | def json_loads(text: str) -> dict:
40 |     text = text.strip('\n')
41 |     if text.startswith('```') and text.endswith('\n```'):
42 |         text = '\n'.join(text.split('\n')[1:-1])
43 |     try:
44 |         return json.loads(text)
45 |     except json.decoder.JSONDecodeError as json_err:
46 |         try:
47 |             return json5.loads(text)
48 |         except ValueError:
49 |             raise json_err


--------------------------------------------------------------------------------
/examples/generation/run_deepseek7b_mutli_node.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet
 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet
 5 | model_path=deepseek-ai/deepseek-llm-7b-chat
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=2 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$data_path \
11 |     data.prompt_key=prompt \
12 |     data.n_samples=1 \
13 |     data.output_path=$save_path \
14 |     model.path=$model_path\
15 |     +model.trust_remote_code=True \
16 |     rollout.temperature=1.0 \
17 |     rollout.top_k=50 \
18 |     rollout.top_p=0.7 \
19 |     rollout.prompt_length=2048 \
20 |     rollout.response_length=1024 \
21 |     rollout.tensor_model_parallel_size=16 \
22 |     rollout.gpu_memory_utilization=0.8
23 | 


--------------------------------------------------------------------------------
/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet
 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet
 5 | model_path=deepseek-ai/deepseek-llm-7b-chat
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=1 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$data_path \
11 |     data.prompt_key=prompt \
12 |     data.n_samples=1 \
13 |     data.output_path=$save_path \
14 |     model.path=$model_path \
15 |     +model.trust_remote_code=True \
16 |     rollout.temperature=1.0 \
17 |     rollout.top_k=50 \
18 |     rollout.top_p=0.7 \
19 |     rollout.prompt_length=2048 \
20 |     rollout.response_length=1024 \
21 |     rollout.tensor_model_parallel_size=2 \
22 |     rollout.gpu_memory_utilization=0.8
23 | 


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=1024 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \
17 |     actor_rollout_ref.actor.use_kl_loss=True \
18 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
19 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
20 |     actor_rollout_ref.actor.entropy_coeff=0 \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=5 \
29 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \
30 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
31 |     algorithm.use_kl_in_reward=False \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console'] \
34 |     trainer.project_name='verl_grpo_example_gsm8k' \
35 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=20 \
39 |     trainer.test_freq=5 \
40 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo --config-path=config \
 4 |     --config-name='ppo_megatron_trainer.yaml'\
 5 |     algorithm.adv_estimator=grpo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=1024 \
11 |     data.filter_overlong_prompts=True \
12 |     data.truncation='error' \
13 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
18 |     actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \
19 |     actor_rollout_ref.actor.megatron.tensor_model_parallel_size=4 \
20 |     actor_rollout_ref.actor.use_kl_loss=True \
21 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
22 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
23 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=5 \
29 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
30 |     algorithm.kl_ctrl.kl_coef=0.001 \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console','wandb'] \
33 |     trainer.project_name='verl_grpo_example_gsm8k' \
34 |     trainer.experiment_name='deepseek_llm_7b_function_rm_megatron' \
35 |     trainer.n_gpus_per_node=16 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=-1 \
38 |     trainer.test_freq=5 \
39 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
17 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
18 |     actor_rollout_ref.actor.use_kl_loss=True \
19 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
20 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
21 |     actor_rollout_ref.actor.entropy_coeff=0 \
22 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
23 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=5 \
29 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
30 |     algorithm.use_kl_in_reward=False \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console','wandb'] \
33 |     trainer.project_name='verl_grpo_example_gsm8k' \
34 |     trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \
35 |     trainer.n_gpus_per_node=8 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=20 \
38 |     trainer.test_freq=5 \
39 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=gae \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.actor.use_kl_loss=False \
20 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
21 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
22 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
23 |     actor_rollout_ref.rollout.name=vllm \
24 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
25 |     critic.optim.lr=1e-5 \
26 |     critic.model.use_remove_padding=True \
27 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
28 |     critic.model.enable_gradient_checkpointing=True \
29 |     critic.ppo_micro_batch_size_per_gpu=32 \
30 |     critic.model.fsdp_config.param_offload=False \
31 |     critic.model.fsdp_config.optimizer_offload=False \
32 |     algorithm.use_kl_in_reward=False \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console','wandb'] \
35 |     trainer.project_name='verl_example_gsm8k' \
36 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=20 \
40 |     trainer.test_freq=1 \
41 |     trainer.total_epochs=15 $@
42 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_gemma.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=gae \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=512 \
 8 |     data.max_prompt_length=1024 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=google/gemma-2-2b-it \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=False \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.actor.use_kl_loss=False \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.use_remove_padding=False \
26 |     critic.model.path=google/gemma-2-2b-it \
27 |     critic.model.enable_gradient_checkpointing=False \
28 |     critic.ppo_micro_batch_size_per_gpu=4 \
29 |     critic.model.fsdp_config.param_offload=False \
30 |     critic.model.fsdp_config.optimizer_offload=False \
31 |     algorithm.use_kl_in_reward=False \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_example' \
35 |     trainer.experiment_name='gemma2b_function_rm' \
36 |     trainer.n_gpus_per_node=2 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=20 \
39 |     trainer.test_freq=10 \
40 |     trainer.total_epochs=15 $@
41 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_deepseek_6b7.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     data.prompt_dict_keys=['question'] \
21 |     +data.response_dict_keys=['answer'] \
22 |     data.micro_batch_size_per_gpu=4 \
23 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
24 |     trainer.default_local_dir=$save_path \
25 |     trainer.project_name=gsm8k-sft \
26 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
27 |     trainer.total_epochs=4 \
28 |     trainer.logger=['console','wandb'] \
29 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_2b.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_gemma_2b.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     data.prompt_dict_keys=['question'] \
23 |     +data.response_dict_keys=['answer'] \
24 |     data.micro_batch_size_per_gpu=4 \
25 |     model.partial_pretrain=google/gemma-2b-it \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
29 |     trainer.total_epochs=2 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_gemma_7b.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=prompt \
19 |     data.response_key=answer \
20 |     data.micro_batch_size_per_gpu=4 \
21 |     model.partial_pretrain=google/gemma-1.1-7b-it \
22 |     trainer.default_local_dir=$save_path \
23 |     trainer.project_name=gsm8k-sft \
24 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
25 |     trainer.total_epochs=4 \
26 |     trainer.logger=['console','wandb'] \
27 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_qwen_05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_epochs=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     trainer.default_local_dir=$save_path \
26 |     trainer.project_name=gsm8k-sft \
27 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \
28 |     trainer.logger=['console'] \
29 |     trainer.total_training_steps=1 \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/examples/sft/multiturn/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | if [ "$#" -lt 2 ]; then
 5 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | nproc_per_node=$1
10 | save_path=$2
11 | 
12 | # Shift the arguments so $@ refers to the rest
13 | shift 2
14 | 
15 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
16 |      -m verl.trainer.fsdp_sft_trainer \
17 |     data.train_files=$HOME/data/multiturn/train.parquet \
18 |     data.val_files=$HOME/data/multiturn/test.parquet \
19 |     data.multiturn.enable=true \
20 |     data.multiturn.messages_key=messages \
21 |     data.micro_batch_size=4 \
22 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
23 |     trainer.default_local_dir=$save_path \
24 |     trainer.project_name=multiturn-sft \
25 |     trainer.experiment_name=multiturn-sft-qwen-2.5-0.5b-instruct-sp2 \
26 |     trainer.logger=['console'] \
27 |     trainer.total_training_steps=1 \
28 |     trainer.default_hdfs_dir=null $@ \
29 |     ulysses_sequence_parallel_size=2 \
30 |     use_remove_padding=true


--------------------------------------------------------------------------------
/examples/sglang_multiturn/README.md:
--------------------------------------------------------------------------------
 1 | # Multi-Turn Rollout Example (GSM8K)
 2 | 
 3 | This example demonstrates how to perform **multi-turn rollout** using SGLang with a tool-calling capable model (e.g., Qwen2.5-3B) on the GSM8K dataset.
 4 | 
 5 | ## Usage
 6 | 
 7 | ### Step 1: Download GSM8K Dataset
 8 | 
 9 | ```bash
10 | cd examples/data_preprocess
11 | python3 gsm8k_multiturn_w_tool.py
12 | ```
13 | 
14 | This will download and preprocess the GSM8K dataset into ~/data/gsm8k/.
15 | 
16 | ### Step 2: Run Multi-Turn Rollout
17 | 
18 | If you have 8 GPUs
19 | Use the standard 8-GPU script:
20 | 
21 | ```bash
22 | cd your_verl_root_dir
23 | bash examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh
24 | ```
25 | 
26 | If you have only 4 GPUs
27 | Use the fallback 4-GPU script:
28 | 
29 | ```bash
30 | cd your_verl_root_dir
31 | bash examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh 
32 | ```
33 | 
34 | ## Notes
35 | 
36 | - The rollout supports multi-turn conversations with tool-calling capabilities.
37 | - Current tools are used for GSM8K answer evaluation.
38 | - Future versions may extend to search and code interpreter tools.
39 | 


--------------------------------------------------------------------------------
/examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml:
--------------------------------------------------------------------------------
 1 | hydra:
 2 |   searchpath:
 3 |     - file://verl/trainer/config
 4 | 
 5 | defaults:
 6 |   - ppo_trainer
 7 |   - _self_
 8 | 
 9 | data:
10 |   max_prompt_length: 1024
11 |   max_response_length: 1024
12 |   train_batch_size: 256
13 |   return_raw_chat: True
14 | 
15 | actor_rollout_ref:
16 |   hybrid_engine: True
17 |   rollout:
18 |     name: sglang_async
19 |     multi_turn:
20 |       enable: True
21 |       max_turns: 5
22 |       # tool_config_path: "./config/tool_config/gsm8k_tool_config.yaml"
23 | 


--------------------------------------------------------------------------------
/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml:
--------------------------------------------------------------------------------
 1 | tools:
 2 |   - class_name: "verl.tools.gsm8k_tool.Gsm8kTool"
 3 |     config: {}
 4 |     tool_schema:
 5 |       type: "function"
 6 |       function:
 7 |         name: "calc_gsm8k_reward"
 8 |         description: "A tool for calculating the reward of gsm8k. (1.0 if parsed answer is correct, 0.0 if parsed answer is incorrect or not correctly parsed)"
 9 |         parameters:
10 |           type: "object"
11 |           properties:
12 |             answer:
13 |               type: "string"
14 |               description: "The model's answer to the GSM8K math problem, must be a digits"
15 |           required: ["answer"]
16 | 


--------------------------------------------------------------------------------
/examples/split_placement/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 main_ppo_split.py \
 4 |     algorithm.adv_estimator=gae \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     data.filter_overlong_prompts=True \
11 |     data.truncation='error' \
12 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.actor.use_kl_loss=False \
19 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
20 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
21 |     actor_rollout_ref.rollout.name=vllm \
22 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
23 |     critic.optim.lr=1e-5 \
24 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
25 |     critic.model.enable_gradient_checkpointing=False \
26 |     critic.ppo_micro_batch_size_per_gpu=8 \
27 |     critic.model.fsdp_config.param_offload=False \
28 |     critic.model.fsdp_config.optimizer_offload=False \
29 |     algorithm.use_kl_in_reward=False \
30 |     trainer.critic_warmup=0 \
31 |     trainer.logger=['console','wandb'] \
32 |     trainer.project_name='verl_example_gsm8k' \
33 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
34 |     trainer.n_gpus_per_node=8 \
35 |     trainer.nnodes=1 \
36 |     trainer.save_freq=-1 \
37 |     trainer.total_epochs=15 $@
38 | 


--------------------------------------------------------------------------------
/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/rlhf/gsm8k/train.parquet
 4 | gsm8k_val_path=$HOME/data/rlhf/math/test.parquet
 5 | model_path=Qwen/Qwen2-72B-Instruct
 6 | 
 7 | python3 -m verl.trainer.main_ppo \
 8 |     algorithm.adv_estimator=grpo \
 9 |     data.train_files=$data_path \
10 |     data.val_files=$gsm8k_val_path \
11 |     data.train_batch_size=1024 \
12 |     data.max_prompt_length=512 \
13 |     data.max_response_length=512 \
14 |     data.filter_overlong_prompts=True \
15 |     data.truncation='error' \
16 |     actor_rollout_ref.model.path=model_path \
17 |     actor_rollout_ref.actor.optim.lr=1e-6 \
18 |     actor_rollout_ref.model.use_remove_padding=True \
19 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
20 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
21 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
22 |     actor_rollout_ref.actor.use_kl_loss=True \
23 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
24 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
25 |     actor_rollout_ref.actor.entropy_coeff=0 \
26 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
27 |     actor_rollout_ref.actor.fsdp_config.param_offload=True \
28 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
29 |     actor_rollout_ref.rollout.tensor_model_parallel_size=16 \
30 |     actor_rollout_ref.rollout.name=vllm \
31 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
32 |     actor_rollout_ref.rollout.n=5 \
33 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
34 |     algorithm.use_kl_in_reward=False \
35 |     trainer.critic_warmup=0 \
36 |     trainer.logger=['console','wandb'] \
37 |     trainer.project_name='verl_grpo_example_gsm8k' \
38 |     trainer.experiment_name='Qwen2_72B_Instruct' \
39 |     trainer.n_gpus_per_node=8 \
40 |     trainer.nnodes=4 \
41 |     trainer.save_freq=-1 \
42 |     trainer.test_freq=5 \
43 |     trainer.total_epochs=1 $@


--------------------------------------------------------------------------------
/generator/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_generator import BaseGenerator, _GENERATORS
 2 | from .api_generator import APIGenerator
 3 | 
 4 | 
 5 | def get_generator(name: str) -> BaseGenerator:
 6 |     """
 7 |     Return constructor for specified generator
 8 |     """
 9 |     name = "".join(name.lower().split("_"))
10 |     if name in _GENERATORS:
11 |         return _GENERATORS[name]
12 |     else:
13 |         raise Exception("Error: Trying to access a generator that has not been registered")
14 | 


--------------------------------------------------------------------------------
/generator/base_generator.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from typing import Dict, Any, Tuple, Optional
 3 | from abc import abstractmethod
 4 | from easydict import EasyDict
 5 | 
 6 | 
 7 | # specifies a dictionary of engines
 8 | _GENERATORS: Dict[str, Any] = {}  # registry
 9 | 
10 | 
11 | def register_generator(name):
12 |     """Decorator used to register a generator
13 |     Args:
14 |         name: Name of the engine type to register
15 |     """
16 | 
17 |     def register_class(cls, name):
18 |         _GENERATORS[name] = cls
19 |         setattr(sys.modules[__name__], name, cls)
20 |         return cls
21 | 
22 |     if isinstance(name, str):
23 |         name = name.lower()
24 |         return lambda c: register_class(c, name)
25 | 
26 |     cls = name
27 |     name = cls.__name__
28 |     register_class(cls, name.lower())
29 | 
30 |     return cls
31 | 
32 | 
33 | @register_generator
34 | class BaseGenerator:
35 |     def __init__(self, config: EasyDict):
36 |         self.config = config
37 | 
38 |     @abstractmethod
39 |     def generate(self, *args, **kwargs):
40 |         pass
41 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | pip3 install accelerate bitsandbytes datasets deepspeed==0.16.4 einops flash-attn==2.7.0.post2 isort jsonlines loralib optimum packaging peft pynvml>=12.0.0 ray[default]==2.42.0 tensorboard torch torchmetrics tqdm transformers==4.48.3 transformers_stream_generator wandb wheel
2 | pip3 install vllm==0.8.5
3 | pip3 install "qwen-agent[code_interpreter]"
4 | pip3 install llama_index bs4 pymilvus infinity_client codetiming tensordict==0.6 omegaconf torchdata==0.10.0 hydra-core easydict dill python-multipart
5 | pip3 install -e . --no-deps
6 | pip3 install faiss-gpu-cu12


--------------------------------------------------------------------------------
/rag_server/README.md:
--------------------------------------------------------------------------------
 1 | ## env configuration
 2 | ```bash
 3 | conda create -n qwen_demo python=3.10
 4 | conda activate searchr1
 5 | pip3 install torch-2.6.0 torchaudio-2.6.0 torchvision-0.21.0
 6 | pip3 install vllm==0.8.5 
 7 | 
 8 | # flash attention 2
 9 | pip3 install flash-attn --no-build-isolation 
10 | pip3 install faiss-gpu-cu12 uvicorn fastapi mcp #这里也可以安装faiss-gpu==1.8
11 | ```
12 | ## Quick start
13 | 
14 | (1) download dataset
15 | ```bash
16 | save_path=/your/path/to/save
17 | python scripts/download.py --save_path $save_path
18 | cat $save_path/part_* > $save_path/e5_Flat.index
19 | gzip -d $save_path/wiki-18.jsonl.gz
20 | ```
21 | 
22 | (2) process NQ dataset.
23 | ```bash
24 | python scripts/nq_search.py
25 | ```
26 | 


--------------------------------------------------------------------------------
/rag_server/download.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from huggingface_hub import hf_hub_download
 3 | 
 4 | parser = argparse.ArgumentParser(description="Download files from a Hugging Face dataset repository.")
 5 | parser.add_argument("--repo_id", type=str, default="PeterJinGo/wiki-18-e5-index", help="Hugging Face repository ID")
 6 | parser.add_argument("--save_path", type=str, required=True, help="Local directory to save files")
 7 |     
 8 | args = parser.parse_args()
 9 | 
10 | repo_id = "PeterJinGo/wiki-18-e5-index"
11 | for file in ["part_aa", "part_ab"]:
12 |     hf_hub_download(
13 |         repo_id=repo_id,
14 |         filename=file,  # e.g., "e5_Flat.index"
15 |         repo_type="dataset",
16 |         local_dir=args.save_path,
17 |     )
18 | 
19 | repo_id = "PeterJinGo/wiki-18-corpus"
20 | hf_hub_download(
21 |         repo_id=repo_id,
22 |         filename="wiki-18.jsonl.gz",
23 |         repo_type="dataset",
24 |         local_dir=args.save_path,
25 | )


--------------------------------------------------------------------------------
/rag_server/launch.sh:
--------------------------------------------------------------------------------
 1 | #tmux new -s rag_server
 2 | #conda activate searchr1
 3 | #bash retrieval_launch.sh
 4 | #tmux detach
 5 | #bash train_ppo.sh
 6 | 
 7 | nvcc --version
 8 | 
 9 | file_path=/your/path/to/PeterGriffinJin/Search-R1/data/rag_data
10 | index_file=$file_path/e5_Flat.index
11 | corpus_file=$file_path/wiki-18.jsonl
12 | retriever=/your/path/to/PeterGriffinJin/Search-R1/huggingface.co/intfloat/e5-base-v2
13 | 
14 | python rag_server/retrieval_server.py --index_path $index_file \
15 |                                             --corpus_path $corpus_file \
16 |                                             --topk 3 \
17 |                                             --retriever_model $retriever &
18 | sleep 1200
19 | 
20 | # 执行后续请求
21 | curl -X POST http://127.0.0.1:5003/retrieve \
22 |   -H "Content-Type: application/json" \
23 |   -d '{
24 |       "queries": ["What is Python?", "Tell me about neural networks."],
25 |       "topk": 3,
26 |       "return_scores": true
27 |       }'


--------------------------------------------------------------------------------
/recipe/dapo/config/dapo_trainer.yaml:
--------------------------------------------------------------------------------
 1 | hydra:
 2 |   searchpath:
 3 |     - file://verl/trainer/config
 4 | 
 5 | defaults:
 6 |   - ppo_trainer
 7 |   - _self_
 8 | 
 9 | data:
10 |   gen_batch_size: ${data.train_batch_size}
11 | 
12 | reward_model:
13 |   reward_manager: dapo
14 |   overlong_buffer: 
15 |     enable: False # We try to avoid forgetting to set enable
16 |     len: 0
17 |     penalty_factor: 0.0
18 |     log: False
19 | 
20 | algorithm:
21 |   filter_groups:
22 |     enable: False # We try to avoid forgetting to set enable
23 |     metric: null # acc / score / seq_reward / seq_final_reward / ...
24 |     max_num_gen_batches: 0 # Non-positive values mean no upper limit
25 | 
26 | trainer:
27 |   project_name: verl-dapo
28 | 


--------------------------------------------------------------------------------
/recipe/dapo/prepare_dapo_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -uxo pipefail
 3 | 
 4 | export VERL_HOME=${VERL_HOME:-"${HOME}/verl"}
 5 | export TRAIN_FILE=${TRAIN_FILE:-"${VERL_HOME}/data/dapo-math-17k.parquet"}
 6 | export TEST_FILE=${TEST_FILE:-"${VERL_HOME}/data/aime-2024.parquet"}
 7 | export OVERWRITE=${OVERWRITE:-0}
 8 | 
 9 | mkdir -p "${VERL_HOME}/data"
10 | 
11 | if [ ! -f "${TRAIN_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
12 |   wget -O "${TRAIN_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k/resolve/main/data/dapo-math-17k.parquet?download=true"
13 | fi
14 | 
15 | if [ ! -f "${TEST_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then
16 |   wget -O "${TEST_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/AIME-2024/resolve/main/data/aime-2024.parquet?download=true"
17 | fi
18 | 


--------------------------------------------------------------------------------
/recipe/drgrpo/README.md:
--------------------------------------------------------------------------------
 1 | # Dr. GRPO Open-Source Implementation
 2 | 
 3 | 
 4 | https://github.com/sail-sg/understand-r1-zero
 5 | 
 6 | 
 7 | This paper suggests a way to calculate the unbiased policy gradient.
 8 | 
 9 | 
10 | ## Configuration
11 | ```yaml
12 | actor_rollout_ref:
13 |   actor:
14 |     loss_agg_mode: "seq-mean-token-sum-norm" # turn off seq-dim averaging
15 |     use_kl_loss: False
16 | algorithm:
17 |   norm_adv_by_std_in_grpo: False # turn off standard deviation norm
18 | ```
19 | 
20 | , with all other parameters set same as GRPO.
21 | 


--------------------------------------------------------------------------------
/recipe/prime/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/recipe/r1/README.md:
--------------------------------------------------------------------------------
 1 | # DeepSeek R1 Reproduction
 2 | 
 3 | This recipe is under development, if you are interested, checkout the TODO list and join this project! https://github.com/volcengine/verl/issues/708 
 4 | 
 5 | ## Reproducing Evaluation
 6 | 
 7 | Eval Results of DS-R1-Distill-Qwen2.5-1.5B (k=8)
 8 | 
 9 | Dataset | Test Results | Reported
10 | -- | -- | --
11 | GPQA Diamond | 35.3 | 33.8
12 | LiveCodeBench | 16.9 | 16.9
13 | AIME 2024 | 30.4 | 28.9
14 | CNMO 2024 (en) | 45.1 | -
15 | CNMO 2024 (zh) | 41.0 | -
16 | 
17 | ---
18 | 
19 | Eval Results (DS-R1)
20 | 
21 | Dataset | Test Results (k=1) | Test Results (k=4) | Reported
22 | -- | -- | -- | --
23 | GPQA Diamond | 67.7 | 69.6 | 71.5
24 | LiveCodeBench | 64.7 | 63.1 | 65.9
25 | AIME 2024 | 86.7 | 79.2 | 79.8
26 | CNMO 2024 | 75.0 | 78.5 | 78.8
27 | 


--------------------------------------------------------------------------------
/recipe/r1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/recipe/r1/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 
12 | ray_init:
13 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.


--------------------------------------------------------------------------------
/recipe/r1/reward_score.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def reward_func(data_source, solution_str, ground_truth, extra_info=None):
17 |     if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]:
18 |         from recipe.r1.tasks import math
19 | 
20 |         return math.compute_score(solution_str, ground_truth)
21 |     elif data_source == "Idavidrein/gpqa":
22 |         from recipe.r1.tasks import gpqa
23 | 
24 |         return gpqa.compute_score(solution_str, ground_truth)
25 |     elif data_source in ["livecodebench/code_generation_lite", "livecodebench/code_generation"]:
26 |         from recipe.r1.tasks import livecodebench
27 | 
28 |         return livecodebench.compute_score(solution_str, ground_truth)
29 |     else:
30 |         raise NotImplementedError
31 | 


--------------------------------------------------------------------------------
/recipe/r1/run_r1_distill_qwen.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH=Qwen/DeepSeek-R1-Distill-Qwen-1.5B
 2 | DATA_PATH=/workspace/datasets/r1_bench
 3 | 
 4 | # Eval Data Process
 5 | python3 -m recipe.r1.data_process \
 6 |     --local_dir $DATA_PATH \
 7 |     --tasks all
 8 | 
 9 | # Generation
10 | python3 -m verl.trainer.main_generation \
11 |     trainer.nnodes=1 \
12 |     trainer.n_gpus_per_node=8 \
13 |     data.path=$DATA_PATH/test.parquet \
14 |     data.prompt_key=prompt \
15 |     data.batch_size=1024 \
16 |     data.n_samples=8 \
17 |     data.output_path=$DATA_PATH/test-output-8.parquet \
18 |     model.path=$MODEL_PATH \
19 |     rollout.temperature=0.6 \
20 |     rollout.top_p=0.95 \
21 |     rollout.prompt_length=1024 \
22 |     rollout.response_length=32768 \
23 |     rollout.tensor_model_parallel_size=1 \
24 |     rollout.gpu_memory_utilization=0.9 \
25 |     rollout.max_num_batched_tokens=65536
26 | 
27 | # Evaluation
28 | python3 -m recipe.r1.main_eval \
29 |     data.path=$DATA_PATH/test-output-8.parquet \
30 |     data.prompt_key=prompt \
31 |     data.response_key=responses \
32 |     custom_reward_function.path=recipe/r1/reward_score.py \
33 |     custom_reward_function.name=reward_func
34 | 


--------------------------------------------------------------------------------
/recipe/r1/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/recipe/r1/tasks/gpqa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | # Extraction Template from https://github.com/openai/simple-evals/blob/90e3e821cabba2aeb6be651dcb662b253df04225/common.py#L25
18 | ANSWER_PATTERN_MULTICHOICE = r"(?i)Answer[ \t]*:[ \t]*\$?([A-D])\$?"
19 | 
20 | 
21 | def compute_score(solution_str, ground_truth) -> float:
22 |     match = re.search(ANSWER_PATTERN_MULTICHOICE, solution_str)
23 |     extracted_answer = match.group(1) if match else None
24 |     score = 1.0 if extracted_answer == ground_truth else 0.0
25 |     return score
26 | 


--------------------------------------------------------------------------------
/recipe/r1/tasks/math.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import contextlib
15 | 
16 | try:
17 |     from math_verify.metric import math_metric
18 |     from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
19 | except ImportError:
20 |     print("To use Math-Verify, please install it first by running `pip install math-verify`.")
21 | 
22 | 
23 | def compute_score(model_output: str, ground_truth: str) -> bool:
24 |     verify_func = math_metric(
25 |         gold_extraction_target=(LatexExtractionConfig(),),
26 |         pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
27 |     )
28 |     ret_score = 0.0
29 | 
30 |     # Wrap the ground truth in \boxed{} format for verification
31 |     ground_truth_boxed = "\\boxed{" + ground_truth + "}"
32 |     with contextlib.suppress(Exception):
33 |         ret_score, _ = verify_func([ground_truth_boxed], [model_output])
34 | 
35 |     return ret_score
36 | 


--------------------------------------------------------------------------------
/recipe/sppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023-2024 SGLang Team
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/recipe/sppo/config/sppo_trainer.yaml:
--------------------------------------------------------------------------------
 1 | # the sppo config will override default ppo_trainer.yaml
 2 | 
 3 | hydra:
 4 |   searchpath:
 5 |     - file://verl/trainer/config
 6 | 
 7 | defaults:
 8 |   - ppo_trainer
 9 |   - _self_
10 | 
11 | actor_rollout_ref:
12 |   actor:
13 |     sppo_eta: 1.0
14 |     optim:
15 |       lr_warmup_steps: 15
16 |   rollout:
17 |     name: sglang
18 |     tensor_model_parallel_size: 2
19 |     gpu_memory_utilization: 0.5
20 |     val_kwargs:
21 |       n: 2  # 2 will trigger validation, 1 will bypass
22 | 
23 | algorithm:
24 |   adv_estimator: null
25 |   sppo_eta: 1.0
26 | 
27 | trainer:
28 |   log_val_generations: 0


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | liger-kernel
 9 | numpy
10 | pandas
11 | peft
12 | pyarrow>=19.0.0
13 | pybind11
14 | pylatexenc
15 | pre-commit
16 | ray[default]
17 | tensordict<=0.6.2
18 | torchdata
19 | transformers
20 | # vllm==0.8.4
21 | wandb
22 | packaging>=20.0
23 | uvicorn
24 | fastapi
25 | qwen-agent
26 | mcp
27 | 


--------------------------------------------------------------------------------
/requirements_sglang.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | numpy
 9 | pandas
10 | peft
11 | pyarrow>=19.0.0
12 | pybind11
13 | pylatexenc
14 | ray[default]>=2.10
15 | tensordict<=0.6.2
16 | torchdata
17 | torchvision
18 | transformers
19 | wandb
20 | sglang[all]==0.4.4.post4
21 | torch-memory-saver>=0.0.5
22 | mcp
23 | qwen_agent
24 | 


--------------------------------------------------------------------------------
/scripts/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip3 install --upgrade yapf
3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests single_controller examples recipe
4 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/distributed/run_all.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #!/usr/bin/env bash
16 | 
17 | set -e -x
18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/create_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from torch.utils import data
18 | 
19 | from tests.e2e.envs.digit_completion import DigitCompletion
20 | 
21 | if __name__ == "__main__":
22 |     simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9)
23 |     all_prompts = simple_task.get_all_prompts()
24 | 
25 |     # 21 * 6 * 4
26 |     train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2])
27 |     train_data = list(train_data)
28 |     test_data = list(test_data)
29 | 
30 |     train_data = [[{"role": "user", "content": str(item)}] for item in train_data]
31 |     test_data = [[{"role": "user", "content": str(item)}] for item in test_data]
32 | 
33 |     print(f"Size of train: {len(train_data)}, size of test: {len(test_data)}")
34 | 
35 |     train_data = {"prompt": train_data}
36 |     test_data = {"prompt": test_data}
37 | 
38 |     model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)))
39 | 
40 |     import pandas as pd
41 | 
42 |     train_data_frame = pd.DataFrame(train_data)
43 |     test_data_frame = pd.DataFrame(test_data)
44 | 
45 |     train_data_frame.to_parquet(os.path.join(model_folder, "train.parquet"))
46 |     test_data_frame.to_parquet(os.path.join(model_folder, "test.parquet"))
47 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "LlamaForCausalLM"
 4 |   ],
 5 |   "attention_bias": false,
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": null,
 8 |   "eos_token_id": 1,
 9 |   "hidden_act": "silu",
10 |   "hidden_size": 128,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 344,
13 |   "max_position_embeddings": 2048,
14 |   "mlp_bias": false,
15 |   "model_type": "llama",
16 |   "num_attention_heads": 4,
17 |   "num_hidden_layers": 4,
18 |   "num_key_value_heads": 4,
19 |   "pad_token_id": 2,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-06,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.43.3",
27 |   "use_cache": true,
28 |   "vocab_size": 16
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "eos_token_id": 1,
4 |   "pad_token_id": 2,
5 |   "transformers_version": "4.43.3"
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/d0615d11b1f2e201f5a42403b1fc4cb01eb2db95/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "char_ords": [
 3 |         48,
 4 |         49,
 5 |         50,
 6 |         51,
 7 |         52,
 8 |         53,
 9 |         54,
10 |         55,
11 |         56,
12 |         57,
13 |         44,
14 |         58
15 |     ],
16 |     "model_max_length": 2048,
17 |     "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}"
18 | }


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
 1 | # Digit completion
 2 | 
 3 | This is an example of solving a digit completion problem. The problem is defined as below:
 4 | 
 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers.
 6 | If the max number is reached, the next number should be modulo with max number.
 7 | 
 8 | For example,
 9 | - prompt = [1, 2, 3]
10 | - N = 5
11 | - max_number = 6
12 | 
13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1].
14 | 
15 | # Environment definition
16 | 
17 | The core definition of the task is defined in tests/e2e/envs/digit_completion/task.py
18 | 
19 | It is highly recommended to take a look at it for better understanding.
20 | 
21 | 
22 | 
23 | # Run experiments
24 | 
25 | An example of running the task is provided in `tests/e2e/run_ray_trainer.sh`.
26 | 
27 | ```bash
28 | bash tests/e2e/run_ray_trainer.sh
29 | ```
30 | 
31 | 


--------------------------------------------------------------------------------
/tests/e2e/check_custom_rwd_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | 
18 | def check_congratulations_in_file(output_file):
19 |     with open(output_file) as f:
20 |         output = f.read()
21 | 
22 |     success_message = "Congratulations!!! You have called my_reward_function successfully!!!"
23 |     assert success_message in output, f"Success message of my_reward_function not found in {output_file}"
24 |     print("Check passes")
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     parser = argparse.ArgumentParser()
29 |     parser.add_argument("--output_file", required=True, type=str)
30 | 
31 |     args = parser.parse_args()
32 | 
33 |     check_congratulations_in_file(args.output_file)
34 | 


--------------------------------------------------------------------------------
/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .digit_completion import DigitCompletion
16 | 
17 | __all__ = ["DigitCompletion"]
18 | 


--------------------------------------------------------------------------------
/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from transformers import AutoTokenizer, LlamaConfig
16 | 
17 | from .task import DigitCompletion, generate_ground_truth_response
18 | from .tokenizer import CharTokenizer
19 | 
20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True)
21 | 
22 | __all__ = ["DigitCompletion", "generate_ground_truth_response", "CharTokenizer"]
23 | 


--------------------------------------------------------------------------------
/tests/e2e/generation/run_gen_qwen05.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Tested with 1 & 4 GPUs
 3 | set -xeuo pipefail
 4 | 
 5 | MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
 6 | 
 7 | NGPUS_PER_NODE=${NGPUS_PER_NODE:-4}
 8 | OUTPUT_PATH=${OUTPUT_PATH:-$HOME/data/gen/qwen_05_gen_test.parquet}
 9 | GEN_TP=${GEN_TP:-2}  # Default tensor parallel size to 2
10 | 
11 | python3 -m verl.trainer.main_generation \
12 |     trainer.nnodes=1 \
13 |     trainer.n_gpus_per_node="${NGPUS_PER_NODE}" \
14 |     data.path="${HOME}/data/gsm8k/test.parquet" \
15 |     data.prompt_key=prompt \
16 |     data.n_samples=1 \
17 |     data.output_path="${OUTPUT_PATH}" \
18 |     model.path="${MODEL_ID}" \
19 |     +model.trust_remote_code=True \
20 |     rollout.temperature=1.0 \
21 |     rollout.top_k=50 \
22 |     rollout.top_p=0.7 \
23 |     rollout.prompt_length=2048 \
24 |     rollout.response_length=1024 \
25 |     rollout.tensor_model_parallel_size="${GEN_TP}" \
26 |     rollout.gpu_memory_utilization=0.8
27 | 


--------------------------------------------------------------------------------
/tests/e2e/run_deepseek_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml
 4 | 
 5 | huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct
 6 | 
 7 | python3 -m verl.trainer.main_ppo --config-path=config \
 8 |     --config-name='ppo_megatron_trainer.yaml'\
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.train_batch_size=1024 \
12 |     data.max_prompt_length=512 \
13 |     data.max_response_length=512 \
14 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \
15 |     actor_rollout_ref.actor.optim.lr=2e-6 \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
18 |     actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \
19 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
20 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
21 |     actor_rollout_ref.rollout.name=vllm \
22 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
23 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
24 |     actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \
25 |     critic.optim.lr=2e-5 \
26 |     critic.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \
27 |     critic.model.enable_gradient_checkpointing=False \
28 |     critic.ppo_micro_batch_size_per_gpu=4 \
29 |     critic.megatron.tensor_model_parallel_size=2 \
30 |     algorithm.kl_ctrl.kl_coef=0.001 \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console'] \
33 |     trainer.project_name='verl_megatron_gsm8k_examples' \
34 |     trainer.experiment_name='deepseek_llm_1b3_function_rm' \
35 |     trainer.n_gpus_per_node=8 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=-1 \
38 |     trainer.test_freq=1 \
39 |     trainer.total_epochs=15 \
40 |     trainer.total_training_steps=3 $@
41 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_grpo.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     algorithm.adv_estimator=grpo \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.train_batch_size=1024 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=1024 \
12 |     data.filter_overlong_prompts=True \
13 |     data.truncation='error' \
14 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
15 |     actor_rollout_ref.actor.optim.lr=1e-6 \
16 |     actor_rollout_ref.model.use_remove_padding=True \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
19 |     actor_rollout_ref.actor.use_kl_loss=True \
20 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
21 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
22 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
23 |     actor_rollout_ref.actor.fsdp_config.param_offload=True \
24 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
27 |     actor_rollout_ref.rollout.name=vllm \
28 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
29 |     actor_rollout_ref.rollout.n=5 \
30 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
31 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console'] \
35 |     trainer.project_name='verl_grpo_example_gsm8k' \
36 |     trainer.experiment_name='qwen2_7b_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=5 \
41 |     trainer.total_epochs=15 \
42 |     trainer.total_training_steps=2 $@


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | ENGINE=${1:-vllm}
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.name=$ENGINE \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.use_remove_padding=True \
26 |     critic.model.path=Qwen/Qwen2.5-0.5B \
27 |     critic.model.enable_gradient_checkpointing=False \
28 |     critic.ppo_micro_batch_size_per_gpu=4 \
29 |     critic.model.fsdp_config.param_offload=False \
30 |     critic.model.fsdp_config.optimizer_offload=False \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console'] \
34 |     trainer.project_name='verl_example_gsm8k' \
35 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=1 \
39 |     trainer.default_local_dir=$HOME/$ENGINE/ckpt/ \
40 |     trainer.total_training_steps=1
41 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     algorithm.kl_ctrl.kl_coef=0.001 \
25 |     algorithm.adv_estimator=grpo \
26 |     trainer.critic_warmup=0 \
27 |     trainer.logger=['console'] \
28 |     trainer.project_name='verl_example_gsm8k' \
29 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
30 |     trainer.n_gpus_per_node=8 \
31 |     trainer.nnodes=1 \
32 |     trainer.save_freq=-1 \
33 |     trainer.total_training_steps=1 $@
34 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=False \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.use_remove_padding=False \
26 |     critic.model.path=Qwen/Qwen2.5-0.5B \
27 |     critic.model.enable_gradient_checkpointing=False \
28 |     critic.ppo_micro_batch_size_per_gpu=4 \
29 |     critic.model.fsdp_config.param_offload=False \
30 |     critic.model.fsdp_config.optimizer_offload=False \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console'] \
34 |     +trainer.val_before_train=False \
35 |     trainer.project_name='verl_example_gsm8k' \
36 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.total_training_steps=1 $@
41 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     algorithm.kl_ctrl.kl_coef=0.001 \
25 |     algorithm.adv_estimator=remax \
26 |     trainer.critic_warmup=0 \
27 |     trainer.logger=['console'] \
28 |     trainer.project_name='verl_example_gsm8k' \
29 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
30 |     trainer.n_gpus_per_node=8 \
31 |     trainer.nnodes=1 \
32 |     trainer.save_freq=-1 \
33 |     trainer.total_training_steps=1 $@
34 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml
 4 | 
 5 | huggingface-cli download Qwen/Qwen2.5-0.5B
 6 | 
 7 | export VLLM_ATTENTION_BACKEND=XFORMERS
 8 | 
 9 | python3 -m verl.trainer.main_ppo --config-path=config \
10 |     --config-name='ppo_megatron_trainer.yaml'\
11 |     data.train_files=$HOME/data/gsm8k/train.parquet \
12 |     data.val_files=$HOME/data/gsm8k/test.parquet \
13 |     data.train_batch_size=1024 \
14 |     data.max_prompt_length=512 \
15 |     data.max_response_length=512 \
16 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
17 |     actor_rollout_ref.actor.optim.lr=2e-6 \
18 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
19 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
20 |     actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \
21 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
22 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
23 |     actor_rollout_ref.rollout.name=vllm \
24 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
25 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
26 |     actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \
27 |     critic.optim.lr=2e-5 \
28 |     critic.model.path=Qwen/Qwen2.5-0.5B \
29 |     critic.model.enable_gradient_checkpointing=False \
30 |     critic.ppo_micro_batch_size_per_gpu=4 \
31 |     critic.megatron.tensor_model_parallel_size=2 \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console'] \
35 |     trainer.project_name='verl_megatron_gsm8k_examples' \
36 |     trainer.experiment_name='qwen2_5_0b5_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=1 \
41 |     trainer.total_epochs=15 \
42 |     trainer.total_training_steps=3 $@
43 | 


--------------------------------------------------------------------------------
/tests/e2e/run_r1_distill_qwen_aime24_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -xeuo pipefail
 3 | 
 4 | huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
 5 |     --local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=1 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$HOME/data/r1/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.batch_size=1024 \
13 |     data.n_samples=1 \
14 |     data.output_path=$HOME/data/r1/test-output-k1.parquet \
15 |     model.path=$HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
16 |     rollout.temperature=0.6 \
17 |     rollout.top_p=0.95 \
18 |     rollout.prompt_length=1024 \
19 |     rollout.response_length=32768 \
20 |     rollout.tensor_model_parallel_size=1 \
21 |     rollout.gpu_memory_utilization=0.95 \
22 |     rollout.max_num_batched_tokens=65536 \
23 |     rollout.enforce_eager=False \
24 |     rollout.free_cache_engine=False
25 | 
26 | python3 -m recipe.r1.main_eval \
27 |     data.path=$HOME/data/r1/test-output-k1.parquet \
28 |     data.prompt_key=prompt \
29 |     data.response_key=responses \
30 |     custom_reward_function.path=recipe/r1/reward_score.py \
31 |     custom_reward_function.name=reward_func


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt"
 6 | 
 7 | export PATH=$PATH:~/.local/bin
 8 | 
 9 | rm -rf $OUTPUT_FILE
10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
11 |     algorithm.adv_estimator=gae \
12 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
13 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
14 |     data.train_batch_size=800 \
15 |     data.max_prompt_length=16 \
16 |     data.max_response_length=32 \
17 |     data.return_raw_input_ids=True \
18 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
19 |     actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \
20 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=128 \
21 |     actor_rollout_ref.actor.entropy_coeff=0 \
22 |     actor_rollout_ref.actor.optim.lr=1e-4 \
23 |     actor_rollout_ref.actor.use_kl_loss=False \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \
25 |     actor_rollout_ref.rollout.name=hf \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
27 |     critic.ppo_micro_batch_size_per_gpu=128 \
28 |     critic.model.path=tests/e2e/arithmetic_sequence/model \
29 |     critic.optim.lr=1e-3 \
30 |     algorithm.use_kl_in_reward=False \
31 |     trainer.total_epochs=200 \
32 |     trainer.experiment_name=arithmetic_sequences \
33 |     trainer.logger=['console'] \
34 |     trainer.n_gpus_per_node=1 \
35 |     trainer.test_freq=1 \
36 |     trainer.save_freq=110 | tee $OUTPUT_FILE;
37 | 
38 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE
39 | rm -rf $OUTPUT_FILE
40 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer_fire_sampling.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt"
 6 | 
 7 | export PATH=$PATH:~/.local/bin
 8 | 
 9 | rm -rf $OUTPUT_FILE
10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
11 |     algorithm.adv_estimator=gae \
12 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
13 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
14 |     data.train_batch_size=800 \
15 |     data.val_batch_size=200 \
16 |     data.max_prompt_length=16 \
17 |     data.max_response_length=32 \
18 |     data.return_raw_input_ids=True \
19 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
20 |     actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \
21 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=128 \
22 |     actor_rollout_ref.actor.entropy_coeff=0 \
23 |     actor_rollout_ref.actor.optim.lr=1e-4 \
24 |     actor_rollout_ref.actor.use_kl_loss=False \
25 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \
26 |     actor_rollout_ref.rollout.name=hf \
27 |     actor_rollout_ref.rollout.use_fire_sampling=True \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
29 |     critic.ppo_micro_batch_size_per_gpu=128 \
30 |     critic.model.path=tests/e2e/arithmetic_sequence/model \
31 |     critic.optim.lr=1e-3 \
32 |     algorithm.use_kl_in_reward=False \
33 |     trainer.total_epochs=200 \
34 |     trainer.experiment_name=arithmetic_sequences \
35 |     trainer.logger=['console'] \
36 |     trainer.n_gpus_per_node=1 \
37 |     trainer.test_freq=1 \
38 |     trainer.save_freq=110 | tee $OUTPUT_FILE;
39 | 
40 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE --target 0.19
41 | rm -rf $OUTPUT_FILE
42 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B
 6 | 
 7 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
 8 |     algorithm.adv_estimator=gae \
 9 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
10 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
11 |     actor_rollout_ref.actor.use_kl_loss=False \
12 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
13 |     actor_rollout_ref.rollout.name=vllm \
14 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
15 |     actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \
16 |     critic.model.path=Qwen/Qwen2.5-0.5B \
17 |     critic.model.use_remove_padding=True \
18 |     algorithm.use_kl_in_reward=False \
19 |     trainer.total_epochs=1


--------------------------------------------------------------------------------
/tests/e2e/run_sppo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -xeuo pipefail
 3 | 
 4 | # in e2e_sppo.yml, we set NUM_GPUS=8 L20
 5 | 
 6 | NUM_GPUS=${NUM_GPUS:-8}
 7 | 
 8 | gsm8k_train_path=./data/math/train.parquet
 9 | gsm8k_test_path=./data/math/test.parquet
10 | train_files="['$gsm8k_train_path']"
11 | test_files="['$gsm8k_test_path']"
12 | 
13 | exp_name="Qwen2.5-0.5B-Instruct-sppo-minimal"
14 | 
15 | python3 -m recipe.sppo.main_sppo \
16 |     data.train_files="$train_files" \
17 |     data.val_files="$test_files" \
18 |     data.train_batch_size=1024 \
19 |     data.max_prompt_length=1024 \
20 |     data.max_response_length=512 \
21 |     data.filter_overlong_prompts=True \
22 |     data.truncation='error' \
23 |     data.return_raw_chat=True \
24 |     actor_rollout_ref.model.path="./models/Qwen2.5-0.5B-Instruct" \
25 |     actor_rollout_ref.actor.optim.lr=1e-6 \
26 |     actor_rollout_ref.model.use_remove_padding=True \
27 |     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
28 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
29 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
30 |     actor_rollout_ref.actor.use_kl_loss=False \
31 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
32 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
33 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
34 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
35 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
36 |     actor_rollout_ref.rollout.name=sglang  \
37 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
38 |     algorithm.use_kl_in_reward=False \
39 |     trainer.critic_warmup=0 \
40 |     trainer.logger=['console'] \
41 |     trainer.val_before_train=True \
42 |     trainer.n_gpus_per_node=8 \
43 |     trainer.nnodes=1 \
44 |     trainer.save_freq=-1 \
45 |     trainer.total_epochs=2 $@


--------------------------------------------------------------------------------
/tests/e2e/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -xeuo pipefail
 3 | 
 4 | # Get the configuration name and engine name from arguments
 5 | CONFIG_NAME="$1"
 6 | ENGINE="${2:-vllm}"
 7 | 
 8 | # Download model if needed
 9 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir "$HOME/models/Qwen/Qwen2.5-0.5B"
10 | 
11 | # Run the training with the specified configuration
12 | python3 -m verl.trainer.main_ppo \
13 |     --config-name "$CONFIG_NAME" "$@" 


--------------------------------------------------------------------------------
/tests/e2e/sft/run_sft.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -xeuo pipefail
 3 | 
 4 | ENTRYPOINT=${ENTRYPOINT:-"-m verl.trainer.fsdp_sft_trainer"}
 5 | 
 6 | NUM_GPUS=${NUM_GPUS:-8}
 7 | 
 8 | MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
 9 | MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
10 | huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
11 | 
12 | TRAIN_FILES=${TRAIN_FILES:-$HOME/data/gsm8k/train.parquet}
13 | VAL_FILES=${VAL_FILES:-$HOME/data/gsm8k/test.parquet}
14 | 
15 | SP_SIZE=${SP_SIZE:-1}
16 | LIGER=${LIGER:-False}
17 | MULTITURN=${MULTITURN:-False}
18 | LORA_RANK=${LORA_RANK:-0}
19 | RM_PAD=${RM_PAD:-True}
20 | 
21 | micro_bsz=2
22 | NUM_GPUS=8
23 | 
24 | project_name="verl-test"
25 | exp_name="$(basename "${MODEL_ID,,}")-sft-minimal"
26 | ckpts_home=${ckpts_home:-$HOME/${project_name}/${exp_name}}
27 | 
28 | mkdir -p "${ckpts_home}"
29 | 
30 | torchrun --standalone --nnodes=1 --nproc_per_node=${NUM_GPUS} ${ENTRYPOINT} \
31 |     data.train_files="${TRAIN_FILES}" \
32 |     data.val_files="${VAL_FILES}" \
33 |     data.prompt_key=extra_info \
34 |     data.response_key=extra_info \
35 |     data.prompt_dict_keys=['question'] \
36 |     data.response_dict_keys=['answer'] \
37 |     data.multiturn.enable="${MULTITURN}" \
38 |     data.multiturn.messages_key=messages \
39 |     optim.lr=1e-4 \
40 |     data.micro_batch_size_per_gpu=${micro_bsz} \
41 |     model.partial_pretrain="${MODEL_PATH}" \
42 |     model.lora_rank="${LORA_RANK}" \
43 |     model.lora_alpha=16 \
44 |     model.target_modules=all-linear \
45 |     model.use_liger="${LIGER}" \
46 |     ulysses_sequence_parallel_size="${SP_SIZE}" \
47 |     use_remove_padding="${RM_PAD}" \
48 |     trainer.default_local_dir="${ckpts_home}" \
49 |     trainer.project_name="${project_name}" \
50 |     trainer.experiment_name="${exp_name}" \
51 |     trainer.total_training_steps=1 \
52 |     trainer.logger=['console'] \
53 |     trainer.default_hdfs_dir=null $@
54 | 
55 | rm -rf "${ckpts_home:?}/*"


--------------------------------------------------------------------------------
/tests/generation/run_gen_qwen05.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 1 & 4 GPUs
 2 | set -x
 3 | 
 4 | if [ "$#" -lt 2 ]; then
 5 |     echo "Usage: run_gen_qwen05.sh <nproc_per_node> <save_path> [other_configs...]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | nproc_per_node=$1
10 | save_path=$2
11 | infer_tp=${3:-2}  # Default tensor parallel size to 2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | python3 -m verl.trainer.main_generation \
17 |     trainer.nnodes=1 \
18 |     trainer.n_gpus_per_node=$nproc_per_node \
19 |     data.path=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=prompt \
21 |     data.n_samples=1 \
22 |     data.output_path=$save_path \
23 |     model.path=Qwen/Qwen2.5-0.5B-Instruct \
24 |     +model.trust_remote_code=True \
25 |     rollout.temperature=1.0 \
26 |     rollout.top_k=50 \
27 |     rollout.top_p=0.7 \
28 |     rollout.prompt_length=2048 \
29 |     rollout.response_length=1024 \
30 |     rollout.tensor_model_parallel_size=$infer_tp \
31 |     rollout.gpu_memory_utilization=0.8
32 | 


--------------------------------------------------------------------------------
/tests/gpu_utility/test_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_flash_attn_cross_entropy():
17 |     import torch
18 |     from flash_attn.ops.triton.cross_entropy import cross_entropy_loss
19 |     from torch import nn
20 | 
21 |     from verl.utils.debug import log_gpu_memory_usage
22 |     from verl.utils.torch_functional import logprobs_from_logits_naive
23 | 
24 |     log_gpu_memory_usage("At start")
25 | 
26 |     hidden_states = torch.randn(size=(2048, 5120), device="cuda", requires_grad=True, dtype=torch.bfloat16)
27 | 
28 |     linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device="cuda", dtype=torch.bfloat16)
29 | 
30 |     logits = linear(hidden_states)
31 | 
32 |     # logits = logits.float()
33 |     labels = torch.randint(low=0, high=155136, size=(2048,), device="cuda")
34 | 
35 |     log_gpu_memory_usage("before computation")
36 |     # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True)
37 |     output = -cross_entropy_loss(logits, labels)[0]
38 |     # output = logprobs_from_logits(logits, labels)
39 |     log_gpu_memory_usage("After forward")
40 |     output.sum().backward()
41 |     log_gpu_memory_usage("After backward")
42 | 
43 |     groundtruth = logprobs_from_logits_naive(logits.float(), labels)
44 | 
45 |     torch.testing.assert_close(output, groundtruth)
46 | 


--------------------------------------------------------------------------------
/tests/kill_github_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$#" -ne 1 ]; then
 4 |     echo "Usage: $0 YOUR_GITHUB_TOKEN"
 5 |     echo "Please provide exactly one input argument for your github token."
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Set your GitHub repository details
10 | OWNER="volcengine"
11 | REPO="verl"
12 | TOKEN=$1
13 | 
14 | # API URL for workflow runs
15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued"
16 | 
17 | # Check required commands
18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; }
19 | 
20 | # Get queued workflow runs
21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL")
22 | 
23 | # Run this for debugging
24 | # echo $response
25 | 
26 | # Extract run IDs
27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id')
28 | 
29 | if [ -z "$queued_run_ids" ]; then
30 |     echo "No queued workflow runs found."
31 |     exit 0
32 | fi
33 | 
34 | # Cancel each queued run
35 | for run_id in $queued_run_ids; do
36 |     echo "Cancelling run $run_id"
37 |     cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel"
38 |     curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url"
39 | done
40 | 
41 | echo "Cancelled all queued workflow runs."
42 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/tests/ray/test_check_worker_alive.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import time
16 | import os
17 | import subprocess
18 | 
19 | 
20 | def test():
21 |     wait_time = 10
22 | 
23 |     my_env = os.environ.copy()
24 |     my_env["WAIT_TIME"] = str(wait_time)
25 | 
26 |     p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE)
27 | 
28 |     count = 0
29 |     while b"foo started" not in p.stdout.read():
30 |         time.sleep(1)
31 |         count += 1
32 |         if count > 40:
33 |             raise RuntimeError("timeout for start foo in check_worker_alive/main.py")
34 | 
35 |     print(
36 |         time.time(),
37 |         f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time")
38 |     time.sleep(wait_time * 1.5)
39 |     print(time.time(), f"start checking")
40 |     assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort"
41 |     assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code"
42 |     print(f"test passed")
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     test()
47 | 


--------------------------------------------------------------------------------
/tests/ray/test_rvdz.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class TestWorker:
20 | 
21 |     def __init__(self, rank, world_size, group_name):
22 |         self.rank = rank
23 |         self.world_size = world_size
24 |         self.group_name = group_name
25 |         self.communicator = None
26 | 
27 |     def init(self):
28 |         from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray
29 |         self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name)
30 | 
31 |     def test(self):
32 |         if self.communicator is None:
33 |             return None
34 |         return self.communicator.rank_id()
35 | 
36 | 
37 | def test_rvdz():
38 |     ray.init()
39 | 
40 |     group_name = "test_group"
41 |     world_size = 2
42 | 
43 |     workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)]
44 | 
45 |     ray.get([worker.init.remote() for worker in workers])
46 | 
47 |     ranks = ray.get([worker.test.remote() for worker in workers])
48 | 
49 |     assert ranks == [0, 1], f"expecting [0, 1], got {ranks}"
50 | 
51 |     ray.shutdown()
52 | 


--------------------------------------------------------------------------------
/tests/ray_cpu/test_check_worker_alive.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import subprocess
17 | import time
18 | 
19 | 
20 | def test():
21 |     wait_time = 10
22 | 
23 |     my_env = os.environ.copy()
24 |     my_env["WAIT_TIME"] = str(wait_time)
25 | 
26 |     p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE)
27 | 
28 |     count = 0
29 |     while b"foo started" not in p.stdout.read():
30 |         time.sleep(1)
31 |         count += 1
32 |         if count > 40:
33 |             raise RuntimeError("timeout for start foo in check_worker_alive/main.py")
34 | 
35 |     print(
36 |         time.time(),
37 |         f"wait 1.5 wait time {wait_time * 1.5} to let signal returned to process but still not exceed process wait time",
38 |     )
39 |     time.sleep(wait_time * 1.5)
40 |     print(time.time(), "start checking")
41 |     assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort"
42 |     assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code"
43 |     print("test passed")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     test()
48 | 


--------------------------------------------------------------------------------
/tests/ray_cpu/test_ray_local_envs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | e2e test verl.single_controller.ray
16 | """
17 | 
18 | import os
19 | 
20 | import ray
21 | 
22 | from verl.single_controller.base.worker import Worker
23 | from verl.single_controller.ray.base import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup
24 | 
25 | 
26 | @ray.remote
27 | class TestActor(Worker):
28 |     def __init__(self) -> None:
29 |         super().__init__()
30 | 
31 |     def getenv(self, key):
32 |         val = os.getenv(key, f"{key} not set")
33 |         return val
34 | 
35 | 
36 | def test_basics():
37 |     ray.init(num_cpus=100)
38 | 
39 |     # create 4 workers, each hold a GPU
40 |     resource_pool = RayResourcePool([4], use_gpu=False)
41 |     class_with_args = RayClassWithInitArgs(cls=TestActor)
42 | 
43 |     worker_group = RayWorkerGroup(resource_pool=resource_pool, ray_cls_with_init=class_with_args, name_prefix="worker_group_basic")
44 | 
45 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE")
46 |     assert output == ["4", "4", "4", "4"]
47 | 
48 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK")
49 |     assert set(output) == set(["0", "1", "2", "3"])
50 | 
51 |     ray.shutdown()
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     test_basics()
56 | 


--------------------------------------------------------------------------------
/tests/ray_gpu/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/tests/ray_gpu/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/tests/ray_gpu/test_rvdz.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class TestWorker:
20 |     def __init__(self, rank, world_size, group_name):
21 |         self.rank = rank
22 |         self.world_size = world_size
23 |         self.group_name = group_name
24 |         self.communicator = None
25 | 
26 |     def init(self):
27 |         from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray
28 | 
29 |         self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name)
30 | 
31 |     def test(self):
32 |         if self.communicator is None:
33 |             return None
34 |         return self.communicator.rank_id()
35 | 
36 | 
37 | def test_rvdz():
38 |     ray.init()
39 | 
40 |     group_name = "test_group"
41 |     world_size = 2
42 | 
43 |     workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)]
44 | 
45 |     ray.get([worker.init.remote() for worker in workers])
46 | 
47 |     ranks = ray.get([worker.test.remote() for worker in workers])
48 | 
49 |     assert ranks == [0, 1], f"expecting [0, 1], got {ranks}"
50 | 
51 |     ray.shutdown()
52 | 


--------------------------------------------------------------------------------
/tests/rl_factory/envs/test_tool_use.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoTokenizer
 2 | from envs.base import Env
 3 | 
 4 | 
 5 | def test():
 6 |     config = type('Config', (), {
 7 |         'config_path': 'envs/configs/calculator.json',
 8 |         'step_token': '\n'
 9 |     })
10 |     env = Env(config)
11 |     tokenizer = AutoTokenizer.from_pretrained('/your/path/to/Qwen/Qwen2.5-7B-Instruct')
12 | 
13 |     response_action = """
14 | Hello!
15 | <actions>
16 |     <action>
17 |         <args>
18 |             {"expression": "1+1"}
19 |         </args>
20 |     </action>
21 |     <action>
22 |         <name>calculator</name>
23 |         <args>
24 |             {"expressions": "1+2"}
25 |         </args>
26 |     </action>
27 | </actions>
28 | """
29 |     response_answer = f"""
30 | Hello!
31 | <answer>
32 | 2
33 | </answer>
34 | """
35 |     env.step([response_action, response_answer], tokenizer)
36 | 
37 | if __name__ == '__main__':
38 |     test()
39 | 


--------------------------------------------------------------------------------
/tests/rl_factory/rewarder/test_parallel.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict
 2 | from multiprocessing import Pool
 3 | from generator import get_generator
 4 | from verl.utils.vllm_request import vllm_generate
 5 | 
 6 | 
 7 | def test_parallel_rewarder():
 8 |     questions = [
 9 |         "Python中如何实现多线程编程？",
10 |         "解释一下Python的GIL（全局解释器锁）及其影响",
11 |         "Python中的装饰器是什么？请举例说明",
12 |         "如何用Python处理JSON数据？",
13 |         "Python中列表(list)和元组(tuple)有什么区别？",
14 |         "解释Python的生成器(generator)和它们的优势",
15 |         "Python中如何处理异常？try-except块如何使用？",
16 |         "Python的虚拟环境(virtualenv)有什么作用？如何创建和使用？"
17 |     ]
18 | 
19 |     generator = get_generator('api')(
20 |         config=EasyDict({
21 |             'api_method': 'local',
22 |             'port': 9000
23 |         }))
24 |     
25 |     print('Start')
26 |     with Pool(processes=8) as pool:
27 |         results = []
28 |         for question in questions:
29 |             result = pool.apply_async(
30 |                 vllm_generate, args=('http://0.0.0.0:8080', question, '/your/path/to/Qwen/QwQ-32B')
31 |             )
32 |             results.append(result)
33 | 
34 |         datasets_processed = [result.get() for result in results]
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     test_parallel_rewarder()
39 | 


--------------------------------------------------------------------------------
/tests/rl_factory/test_qwen3_manager.py:
--------------------------------------------------------------------------------
 1 | from envs.tool_manager.qwen3_manager import QwenManager
 2 | 
 3 | 
 4 | def test_manager():
 5 |     env_config = {
 6 |         'name': 'base',
 7 |         'tool_manager': 'qwen3',
 8 |         'mcp_mode': 'sse',
 9 |         'config_path': 'envs/configs/sse_mcp_tools.pydata',
10 |         'enable_thinking': True,
11 |         'max_prompt_length': 2048,
12 |     }
13 |     manager = QwenManager(env_config)
14 |     print('Tools:')
15 |     for tool_name, tool in manager.all_tools.items():
16 |         print('  - tool name: {}'.format(tool_name))
17 |     
18 |     for func in manager.tool_map.values():
19 |         print(func.function)
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     test_manager()
24 | 


--------------------------------------------------------------------------------
/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_import():
17 |     import verl
18 | 
19 |     print(verl.__version__)
20 | 
21 | 
22 | def test_single_controller_import():
23 |     import verl.single_controller
24 | 
25 |     print(verl.single_controller.__version__)
26 | 


--------------------------------------------------------------------------------
/tests/sft/run_sft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |      -m verl.trainer.fsdp_sft_trainer \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size_per_gpu=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     trainer.default_local_dir=$HOME/ckpts/ \
16 |     trainer.project_name=qwen2.5-sft \
17 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
18 |     trainer.total_training_steps=1 \
19 |     trainer.logger=['console'] \
20 |     trainer.default_hdfs_dir=null $@
21 | 
22 | rm -rf $HOME/ckpts/


--------------------------------------------------------------------------------
/tests/sft/run_sft_qwen05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_sft_qwen05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     +data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_training_steps=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/tests/sft/run_sft_qwen05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_sft_qwen05_sp2_liger.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.total_training_steps=1 \
31 |     trainer.default_hdfs_dir=null $@ \
32 |     ulysses_sequence_parallel_size=2 \
33 |     use_remove_padding=true


--------------------------------------------------------------------------------
/tests/sft/run_sft_sp_loss_match.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |     tests/sft/test_sp_loss_match.py \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     ulysses_sequence_parallel_size=2 \
16 |     use_remove_padding=True \
17 |     trainer.default_local_dir=$HOME/ckpts/ \
18 |     trainer.project_name=qwen2.5-sft \
19 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
20 |     trainer.total_training_steps=1 \
21 |     trainer.logger=['console'] \
22 |     trainer.default_hdfs_dir=null $@
23 | 
24 | rm -rf $HOME/ckpts/
25 | 


--------------------------------------------------------------------------------
/tests/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Tests for the trainer module.
16 | """


--------------------------------------------------------------------------------
/tests/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Tests for the PPO trainer module.
16 | """
17 | 


--------------------------------------------------------------------------------
/tests/utils/cpu_tests/test_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | # Test module for import_utils.load_extern_type testing
17 | class TestClass:
18 |     """A test class to be imported by load_extern_type"""
19 | 
20 |     def __init__(self, value=None):
21 |         self.value = value or "default"
22 | 
23 |     def get_value(self):
24 |         return self.value
25 | 
26 | 
27 | TEST_CONSTANT = "test_constant_value"
28 | 
29 | 
30 | def test_function():
31 |     return "test_function_result"
32 | 


--------------------------------------------------------------------------------
/tests/utils/gpu_tests/dataset/test_rm_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from verl.utils import hf_tokenizer
17 | from verl.utils.dataset.rm_dataset import RMDataset
18 | 
19 | 
20 | def get_rm_data():
21 |     # prepare test dataset
22 |     local_folder = os.path.expanduser("~/verl-data/full_hh_rlhf/rm/")
23 |     local_path = os.path.join(local_folder, "test.parquet")
24 |     os.makedirs(local_folder, exist_ok=True)
25 |     return local_path
26 | 
27 | 
28 | def test_rm_dataset():
29 |     tokenizer = hf_tokenizer("facebook/opt-1.3b")
30 |     local_path = get_rm_data()
31 |     dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512)
32 |     data = dataset[0]["input_ids"]
33 |     output = tokenizer.batch_decode(data)
34 |     assert len(output) > 1
35 |     assert isinstance(output[0], str)
36 | 


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_rm_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from transformers import AutoTokenizer
17 | from verl.utils import hf_tokenizer
18 | from verl.utils.dataset.rm_dataset import RMDataset
19 | 
20 | 
21 | def get_rm_data():
22 |     # prepare test dataset
23 |     url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet"
24 |     local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/')
25 |     local_path = os.path.join(local_folder, 'test.parquet')
26 |     os.makedirs(local_folder, exist_ok=True)
27 |     return local_path
28 | 
29 | 
30 | def test_rm_dataset():
31 |     tokenizer = hf_tokenizer("facebook/opt-1.3b")
32 |     local_path = get_rm_data()
33 |     dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512)
34 |     data = dataset[0]['input_ids']
35 |     output = tokenizer.batch_decode(data)
36 |     assert len(output) > 1
37 |     assert type(output[0]) == str
38 | 


--------------------------------------------------------------------------------
/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | 
18 | from .protocol import DataProto
19 | from .utils.logging_utils import set_basic_config
20 | 
21 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
22 | 
23 | with open(os.path.join(version_folder, "version/version")) as f:
24 |     __version__ = f.read().strip()
25 | 
26 | 
27 | set_basic_config(level=logging.WARNING)
28 | 
29 | 
30 | __all__ = ["DataProto", "__version__"]
31 | 
32 | if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true":
33 |     import importlib
34 | 
35 |     if importlib.util.find_spec("modelscope") is None:
36 |         raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`")
37 |     # Patch hub to download models from modelscope to speed up.
38 |     from modelscope.utils.hf_util import patch_hub
39 | 
40 |     patch_hub()
41 | 


--------------------------------------------------------------------------------
/verl/models/README.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 
 3 | ## Adding a New Huggingface Model
 4 | ### Step 1: Copy the model file from HF to verl
 5 | - Add a new file under verl/models/hf
 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf
 7 | 
 8 | ### Step 2: Modify the model file to use packed inputs
 9 | - Remove all the code related to inference (kv cache)
10 | - Modify the inputs to include only
11 |     - input_ids (total_nnz,)
12 |     - cu_seqlens (total_nnz + 1,)
13 |     - max_seqlen_in_batch: int
14 | - Note that this requires using flash attention with causal mask.
15 | 
16 | ### Step 2.5: Add tests
17 | - Add a test to compare this version and the huggingface version
18 | - Following the infrastructure and add tests to tests/models/hf
19 | 
20 | ### Step 3: Add a function to apply tensor parallelism
21 | - Please follow
22 |     - https://pytorch.org/docs/stable/distributed.tensor.parallel.html
23 |     - https://pytorch.org/tutorials/intermediate/TP_tutorial.html
24 | - General comments
25 |     - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward.
26 | 
27 | ### Step 4: Add a function to apply data parallelism
28 | - Please use FSDP2 APIs
29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413
30 | 
31 | ### Step 5: Add a function to apply pipeline parallelism
32 | - Comes in Pytorch 2.4
33 | - Currently only in alpha in nightly version
34 | - Check torchtitan for more details
35 | 
36 | 


--------------------------------------------------------------------------------
/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     ParallelLlamaForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelLlamaForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelLlamaForCausalLMRmPadPP,
21 |     ParallelLlamaForValueRmPad,
22 |     ParallelLlamaForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelLlamaModel,
25 | )
26 | 
27 | __all__ = [
28 |     "ParallelLlamaForCausalLM",
29 |     "ParallelLlamaForCausalLMRmPad",
30 |     "ParallelLlamaForCausalLMRmPadPP",
31 |     "ParallelLlamaForValueRmPad",
32 |     "ParallelLlamaForValueRmPadPP",
33 |     "ParallelLlamaModel",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_linear import (
18 |     LinearForLastLayer,
19 |     MergedColumnParallelLinear,
20 |     QKVParallelLinear,
21 | )
22 | from .parallel_mlp import ParallelLlamaMLP
23 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
24 | 
25 | __all__ = ["LinearForLastLayer", "MergedColumnParallelLinear", "QKVParallelLinear", "ParallelLlamaAttention", "ParallelLlamaDecoderLayer", "ParallelLlamaDecoderLayerRmPad", "ParallelLlamaMLP", "ParallelLlamaRMSNorm"]
26 | 


--------------------------------------------------------------------------------
/verl/models/mcore/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .registry import get_mcore_forward_fn, get_mcore_weight_converter, hf_to_mcore_config, init_mcore_model
17 | 
18 | __all__ = ["hf_to_mcore_config", "init_mcore_model", "get_mcore_forward_fn", "get_mcore_weight_converter"]
19 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_qwen2_megatron import (
16 |     ParallelQwen2ForCausalLM,
17 |     # rmpad with megatron
18 |     ParallelQwen2ForCausalLMRmPad,
19 |     # rmpad with megatron and pipeline parallelism
20 |     ParallelQwen2ForCausalLMRmPadPP,
21 |     ParallelQwen2ForValueRmPad,
22 |     ParallelQwen2ForValueRmPadPP,
23 |     # original model with megatron
24 |     ParallelQwen2Model,
25 | )
26 | 
27 | __all__ = [
28 |     "ParallelQwen2ForCausalLM",
29 |     "ParallelQwen2ForCausalLMRmPad",
30 |     "ParallelQwen2ForCausalLMRmPadPP",
31 |     "ParallelQwen2ForValueRmPad",
32 |     "ParallelQwen2ForValueRmPadPP",
33 |     "ParallelQwen2Model",
34 | ]
35 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelQwen2Attention
16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad
17 | from .parallel_mlp import ParallelQwen2MLP
18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm
19 | 
20 | __all__ = ["ParallelQwen2Attention", "ParallelQwen2DecoderLayer", "ParallelQwen2DecoderLayerRmPad", "ParallelQwen2MLP", "ParallelQwen2RMSNorm"]
21 | 


--------------------------------------------------------------------------------
/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from . import base
17 | from .base import *
18 | 
19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
20 | 
21 | # Note(haibin.lin): single_controller.__version__ is deprecated
22 | with open(os.path.join(os.path.join(version_folder, os.pardir), "version/version")) as f:
23 |     __version__ = f.read().strip()
24 | 
25 | 
26 | __all__ = base.__all__
27 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup
17 | 
18 | __all__ = ["Worker", "WorkerGroup", "ClassWithInitArgs", "ResourcePool"]
19 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | import ray
18 | 
19 | 
20 | @ray.remote
21 | class WorkerGroupRegisterCenter:
22 |     def __init__(self, rank_zero_info):
23 |         self.rank_zero_info = rank_zero_info
24 |         # rank -> node_id
25 |         self.workers_info: Dict[int, str] = {}
26 | 
27 |     def get_rank_zero_info(self):
28 |         return self.rank_zero_info
29 | 
30 |     def set_worker_info(self, rank, node_id) -> None:
31 |         self.workers_info[rank] = node_id
32 | 
33 |     def get_worker_info(self) -> Dict[int, str]:
34 |         return self.workers_info
35 | 
36 | 
37 | def create_worker_group_register_center(name, info):
38 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
39 | 


--------------------------------------------------------------------------------
/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup, create_colocated_worker_cls, create_colocated_worker_cls_fused
16 | 
17 | __all__ = ["RayClassWithInitArgs", "RayResourcePool", "RayWorkerGroup", "create_colocated_worker_cls", "create_colocated_worker_cls_fused"]
18 | 


--------------------------------------------------------------------------------
/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/sglang/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
15 | #
16 | # Licensed under the Apache License, Version 2.0 (the "License");
17 | # you may not use this file except in compliance with the License.
18 | # You may obtain a copy of the License at
19 | #
20 | #     http://www.apache.org/licenses/LICENSE-2.0
21 | #
22 | # Unless required by applicable law or agreed to in writing, software
23 | # distributed under the License is distributed on an "AS IS" BASIS,
24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | # See the License for the specific language governing permissions and
26 | # limitations under the License.
27 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models
15 | 
16 | from typing import Dict
17 | 
18 | import torch.nn as nn
19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype
20 | 
21 | 
22 | def update_hf_weight_loader():
23 |     print("no hf weight loader need to be updated")
24 |     return
25 | 
26 | 
27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module):
28 |     assert isinstance(actor_weights, Dict)
29 |     with set_default_torch_dtype(next(vllm_model.parameters()).dtype):  # TODO
30 |         if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights:
31 |             del actor_weights["lm_head.weight"]
32 |         vllm_model.load_weights(actor_weights.items())
33 |     for _, module in vllm_model.named_modules():
34 |         quant_method = getattr(module, "quant_method", None)
35 |         if quant_method is not None:
36 |             quant_method.process_weights_after_loading(module)
37 |         # FIXME: Remove this after Mixtral is updated
38 |         # to use quant_method.
39 |         if hasattr(module, "process_weights_after_loading"):
40 |             module.process_weights_after_loading()
41 |     vllm_model = vllm_model.cuda()
42 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader
15 | 
16 | from typing import Dict
17 | 
18 | import torch.nn as nn
19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype
20 | 
21 | 
22 | def update_hf_weight_loader():
23 |     print("no hf weight loader need to be updated")
24 |     return
25 | 
26 | 
27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module):
28 |     assert isinstance(actor_weights, Dict)
29 |     with set_default_torch_dtype(next(vllm_model.parameters()).dtype):  # TODO
30 |         if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights:
31 |             del actor_weights["lm_head.weight"]
32 |         vllm_model.load_weights(actor_weights.items())
33 |     for _, module in vllm_model.named_modules():
34 |         quant_method = getattr(module, "quant_method", None)
35 |         if quant_method is not None:
36 |             quant_method.process_weights_after_loading(module)
37 |         # FIXME: Remove this after Mixtral is updated
38 |         # to use quant_method.
39 |         if hasattr(module, "process_weights_after_loading"):
40 |             module.process_weights_after_loading()
41 |     vllm_model = vllm_model.cuda()
42 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
15 | 
16 | from typing import Optional
17 | 
18 | from transformers import PreTrainedTokenizer
19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup
20 | from vllm.utils import LRUCache
21 | 
22 | 
23 | class TokenizerGroup(TokenizerGroup):
24 |     """A group of tokenizers that can be used for LoRA adapters."""
25 | 
26 |     def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, max_input_length: Optional[int]):
27 |         self.enable_lora = enable_lora
28 |         self.max_input_length = max_input_length
29 |         self.tokenizer = tokenizer
30 |         self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None
31 | 
32 |     # FIXME(sgm): for simplicity, we assign the special token here
33 |     @property
34 |     def pad_token_id(self):
35 |         return self.tokenizer.pad_token_id
36 | 
37 |     @property
38 |     def eos_token_id(self):
39 |         return self.tokenizer.eos_token_id
40 | 


--------------------------------------------------------------------------------
/verl/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 
12 | ray_init:
13 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.


--------------------------------------------------------------------------------
/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 | 
 5 | data:
 6 |   path: ~/data/rlhf/math/test.parquet
 7 |   prompt_key: prompt
 8 |   n_samples: 5
 9 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
10 |   batch_size: 128
11 | 
12 | model:
13 |   path: ~/models/Qwen2-7B-Instruct
14 |   external_lib: null
15 | rollout:
16 |   name: vllm
17 |   mode: sync # sync: LLM, async: AsyncLLM
18 |   temperature: 1.0
19 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
20 |   top_p: 0.7
21 |   prompt_length: 1536
22 |   response_length: 512
23 |   # for vllm rollout
24 |   dtype: bfloat16 # should align with FSDP
25 |   gpu_memory_utilization: 0.5
26 |   ignore_eos: False
27 |   enforce_eager: True
28 |   free_cache_engine: True
29 |   load_format: dummy_dtensor
30 |   tensor_model_parallel_size: 1
31 |   max_num_batched_tokens: 8192
32 |   max_model_len: null
33 |   max_num_seqs: 1024
34 |   log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
35 |   log_prob_micro_batch_size_per_gpu: 8
36 |   # for fire vllm rollout
37 |   use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236
38 |   # for hf rollout
39 |   do_sample: True
40 |   disable_log_stats: True
41 |   enable_chunked_prefill: True
42 |   n: 1
43 | actor:
44 |   strategy: fsdp  # This is for backward-compatibility
45 |   ulysses_sequence_parallel_size: 1 # sp size
46 |   fsdp_config:
47 |     fsdp_size: -1
48 | 
49 | ray_init:
50 |   num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
51 | 


--------------------------------------------------------------------------------
/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
 4 |   micro_batch_size_per_gpu: 4  # this is also val batch size
 5 |   train_files: ~/data/gsm8k/train.parquet
 6 |   val_files: ~/data/gsm8k/test.parquet
 7 |   # Single-turn settings
 8 |   prompt_key: question
 9 |   response_key: answer
10 |   prompt_dict_keys: ['question']
11 |   response_dict_keys: ['answer']
12 |   # Multi-turn settings
13 |   multiturn:
14 |     enable: false  # Set to true to use multi-turn dataset
15 |     messages_key: messages  # Key for messages list in multi-turn mode
16 |   max_length: 1024
17 |   truncation: error
18 |   balance_dp_token: False
19 |   chat_template: null
20 |   custom_cls:
21 |     path: null
22 |     name: null
23 | model:
24 |   partial_pretrain: ~/models/gemma-1.1-7b-it
25 |   fsdp_config:
26 |     wrap_policy:
27 |       min_num_params: 0
28 |     cpu_offload: False
29 |     offload_params: False
30 |   external_lib: null
31 |   enable_gradient_checkpointing: False
32 |   trust_remote_code: False
33 |   lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
34 |   lora_alpha: 16  # LoRA scaling factor
35 |   target_modules: all-linear  # Target modules for LoRA adaptation
36 |   use_liger: False
37 | optim:
38 |   lr: 1e-5
39 |   betas: [0.9, 0.95]
40 |   weight_decay: 0.01
41 |   warmup_steps_ratio: 0.1
42 |   clip_grad: 1.0
43 |   lr_scheduler: cosine
44 | ulysses_sequence_parallel_size: 1
45 | use_remove_padding: False
46 | trainer:
47 |   default_local_dir: /tmp/sft_model
48 |   default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here
49 |   resume_path: null
50 |   project_name: gsm8k-sft
51 |   experiment_name: test
52 |   total_epochs: 4
53 |   total_training_steps: null
54 |   logger: ['console']
55 |   seed: 1
56 | 
57 | 


--------------------------------------------------------------------------------
/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
6 |   # VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import tokenizer
16 | from .tokenizer import hf_processor, hf_tokenizer
17 | 
18 | __all__ = tokenizer.__all__ + ["hf_processor", "hf_tokenizer"]
19 | 


--------------------------------------------------------------------------------
/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from omegaconf import DictConfig
18 | 
19 | 
20 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
21 |     for key in dictionary:
22 |         if hasattr(config, key):
23 |             dictionary[key] = getattr(config, key)
24 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 
19 | __all__ = ["RLHFDataset", "RMDataset", "SFTDataset"]
20 | 


--------------------------------------------------------------------------------
/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import GPUMemoryLogger, log_gpu_memory_usage
16 | 
17 | __all__ = ["GPUMemoryLogger", "log_gpu_memory_usage"]
18 | 


--------------------------------------------------------------------------------
/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | 
16 | import os
17 | 
18 | 
19 | def initialize_global_process_group(timeout_second=36000):
20 |     from datetime import timedelta
21 | 
22 |     import torch.distributed
23 | 
24 |     torch.distributed.init_process_group("nccl", timeout=timedelta(seconds=timeout_second))
25 |     local_rank = int(os.environ["LOCAL_RANK"])
26 |     rank = int(os.environ["RANK"])
27 |     world_size = int(os.environ["WORLD_SIZE"])
28 | 
29 |     if torch.distributed.is_initialized():
30 |         torch.cuda.set_device(local_rank)
31 |     return local_rank, rank, world_size
32 | 


--------------------------------------------------------------------------------
/verl/utils/experimental/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | 
18 | import torch
19 | 
20 | 
21 | def set_basic_config(level):
22 |     """
23 |     This function sets the global logging format and level. It will be called when import verl
24 |     """
25 |     logging.basicConfig(format="%(levelname)s:%(asctime)s:%(message)s", level=level)
26 | 
27 | 
28 | def log_to_file(string):
29 |     print(string)
30 |     if os.path.isdir("logs"):
31 |         with open(f"logs/log_{torch.distributed.get_rank()}", "a+") as f:
32 |             f.write(string + "\n")
33 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | 
18 | class MemoryBuffer:
19 |     def __init__(self, numel, numel_padded, dtype):
20 |         self.numel = numel
21 |         self.numel_padded = numel_padded
22 |         self.dtype = dtype
23 |         self.data = torch.zeros(self.numel_padded, dtype=self.dtype, device=torch.cuda.current_device(), requires_grad=False)
24 | 
25 |     def zero(self):
26 |         """Reset the buffer to zero."""
27 |         self.data.zero_()
28 | 
29 |     def get(self, shape, start_index):
30 |         """Return a tensor with the input `shape` as a view into the
31 |         1-D data starting at `start_index`."""
32 |         end_index = start_index + shape.numel()
33 |         assert end_index <= self.numel, "requested tensor is out of the buffer range."
34 |         buffer_tensor = self.data[start_index:end_index]
35 |         buffer_tensor = buffer_tensor.view(shape)
36 |         return buffer_tensor
37 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from megatron.core.optimizer import OptimizerConfig
17 | from megatron.core.optimizer import get_megatron_optimizer as get_megatron_optimizer_native
18 | 
19 | 
20 | def get_megatron_optimizer(
21 |     model,
22 |     config: OptimizerConfig,
23 |     no_weight_decay_cond=None,
24 |     scale_lr_cond=None,
25 |     lr_mult=1.0,
26 | ):
27 |     # Base optimizer.
28 |     return get_megatron_optimizer_native(
29 |         config=config,
30 |         model_chunks=model,
31 |         no_weight_decay_cond=no_weight_decay_cond,
32 |         scale_lr_cond=scale_lr_cond,
33 |         lr_mult=lr_mult,
34 |     )
35 | 
36 | 
37 | # TODO: add get_optimizer_param_scheduler(optimizer) to implement lr scheuler.
38 | 


--------------------------------------------------------------------------------
/verl/utils/metric/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .utils import reduce_metrics
16 | 
17 | __all__ = ["reduce_metrics"]
18 | 


--------------------------------------------------------------------------------
/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains commonly used utilities for ray
16 | """
17 | 
18 | import concurrent.futures
19 | 
20 | import ray
21 | 
22 | 
23 | def parallel_put(data_list, max_workers=None):
24 |     def put_data(index, data):
25 |         return index, ray.put(data)
26 | 
27 |     if max_workers is None:
28 |         max_workers = min(len(data_list), 16)
29 | 
30 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
31 |         data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)]
32 |         res_lst = []
33 |         for future in concurrent.futures.as_completed(data_list_f):
34 |             res_lst.append(future.result())
35 | 
36 |         # reorder based on index
37 |         output = [None for _ in range(len(data_list))]
38 |         for res in res_lst:
39 |             index, data_ref = res
40 |             output[index] = data_ref
41 | 
42 |     return output
43 | 


--------------------------------------------------------------------------------
/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/geo3k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | from mathruler.grader import extract_boxed_content, grade_answer
18 | 
19 | 
20 | def format_reward(predict_str: str) -> float:
21 |     pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
22 |     match_result = re.fullmatch(pattern, predict_str)
23 |     return 1.0 if match_result else 0.0
24 | 
25 | 
26 | def acc_reward(predict_str: str, ground_truth: str) -> float:
27 |     answer = extract_boxed_content(predict_str)
28 |     return 1.0 if grade_answer(answer, ground_truth) else 0.0
29 | 
30 | 
31 | def compute_score(predict_str: str, ground_truth: str) -> float:
32 |     return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str)
33 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/math_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Individual Contributor: Mert Unsal
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .math import compute_score
16 | 
17 | 
18 | def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):
19 |     """
20 |     This is a demonstration of how the batched reward function should look like.
21 |     Typically, you want to use batched reward to speed up the process with parallelization
22 |     """
23 |     return [compute_score(solution_str, ground_truth) for solution_str, ground_truth in zip(solution_strs, ground_truths)]
24 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/math_verify.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | try:
16 |     from math_verify.errors import TimeoutException
17 |     from math_verify.metric import math_metric
18 |     from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
19 | except ImportError:
20 |     print("To use Math-Verify, please install it first by running `pip install math-verify`.")
21 | 
22 | 
23 | def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> bool:
24 |     verify_func = math_metric(
25 |         gold_extraction_target=(LatexExtractionConfig(),),
26 |         pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
27 |     )
28 |     ret_score = 0.0
29 | 
30 |     # Wrap the ground truth in \boxed{} format for verification
31 |     ground_truth_boxed = "\\boxed{" + ground_truth + "}"
32 |     try:
33 |         ret_score, _ = verify_func([ground_truth_boxed], [model_output])
34 |     except Exception:
35 |         pass
36 |     except TimeoutException:
37 |         ret_score = timeout_score
38 | 
39 |     return ret_score
40 | 


--------------------------------------------------------------------------------
/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.3.1.dev
2 | 


--------------------------------------------------------------------------------
/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | import torch
21 | 
22 | from verl import DataProto
23 | 
24 | __all__ = ["BasePPOCritic"]
25 | 
26 | 
27 | class BasePPOCritic(ABC):
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .batch import BatchRewardManager
16 | from .dapo import DAPORewardManager
17 | from .naive import NaiveRewardManager
18 | from .prime import PrimeRewardManager
19 | from .parallel import AsyncRewardManager
20 | 
21 | __all__ = ["BatchRewardManager", "DAPORewardManager", "NaiveRewardManager", "PrimeRewardManager", "AsyncRewardManager"]
22 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 
17 | __all__ = ["BasePPORewardModel"]
18 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for reward model
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class BasePPORewardModel(ABC):
24 |     def __init__(self, config):
25 |         self.config = config
26 | 
27 |     @abstractmethod
28 |     def compute_reward(self, data: DataProto) -> DataProto:
29 |         """Computing reward given input_ids. The transformers should output a tensor with shape
30 |            [batch_size, sequence_length], and the value at [EOS] mask should be gathered.
31 | 
32 |         Args:
33 |             data: must contain keys "input_ids", "attention_mask" and "position_ids".
34 |                 - input_ids: [batch_size, sequence_length]
35 |                 - attention_mask: [batch_size, sequence_length]
36 |                 - position_ids: [batch_size, sequence_length]
37 | 
38 |         Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward.
39 |             Other position should have zero reward. Note that this may change in the future if we use
40 |             dense reward. So, we leave the interface for general case.
41 |             - reward: [batch_size, sequence_length].
42 | 
43 |         """
44 |         pass
45 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 
17 | __all__ = ["MegatronRewardModel"]
18 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .hf_rollout import HFRollout
17 | from .naive import NaiveRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | 
17 | from verl import DataProto
18 | 
19 | __all__ = ["BaseRollout"]
20 | 
21 | 
22 | class BaseRollout(ABC):
23 |     def __init__(self):
24 |         """
25 | 
26 |         Args:
27 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
28 |             should handle when the training stops.
29 |         """
30 |         super().__init__()
31 | 
32 |     @abstractmethod
33 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
34 |         """Generate sequences"""
35 |         pass
36 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 
17 | __all__ = ["NaiveRollout"]
18 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/sglang_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | 
14 | from .async_sglang_rollout import AsyncSGLangRollout
15 | from .sglang_rollout import SGLangRollout
16 | 
17 | __all__ = ["AsyncSGLangRollout", "SGLangRollout"]
18 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 |     def __enter__(self):
23 |         pass
24 | 
25 |     def __exit__(self, exc_type, exc_value, traceback):
26 |         pass
27 | 
28 |     def preprocess_data(self, data: DataProto) -> DataProto:
29 |         return data
30 | 
31 |     def postprocess_data(self, data: DataProto) -> DataProto:
32 |         return data
33 | 


--------------------------------------------------------------------------------
/webui/README.md:
--------------------------------------------------------------------------------
 1 | # RL Factory WebUI
 2 | 
 3 | This is the Web User Interface for RL Factory, built with Gradio. The interface provides an intuitive way to manage all aspects of reinforcement learning experiments.
 4 | 
 5 | ## Feature Modules
 6 | 
 7 | The WebUI includes the following five main modules:
 8 | 
 9 | 1. **Data Processing** – For managing and processing experiment data
10 | 2. **Tool Definition** – For defining and managing experiment tools
11 | 3. **Environment Definition** – For configuring and managing experiment environments
12 | 4. **Training & Deployment** – For training models and deploying experiments
13 | 5. **Project Management** – For managing experiment projects and resources
14 | 
15 | ## Installation
16 | 
17 | 1. Make sure all dependencies are installed:
18 | ```bash
19 | pip install -r requirements.txt
20 | ```
21 | 
22 | 2. Run the application:
23 | ```bash
24 | python app.py
25 | ```
26 | 
27 | The application will start at http://localhost:7860.
28 | 
29 | ## Development Notes
30 | 
31 | - Each feature module is implemented as a separate tab in `app.py`
32 | - The interface is built using Gradio's Blocks API
33 | - All components support real-time updates and interaction
34 | 
35 | ## Notes
36 | 
37 | - Ensure all necessary dependencies are installed before running the application
38 | - The default port is 7860, which can be modified in `app.py`
39 | - Debug mode is enabled during development; please disable it for production deployment


--------------------------------------------------------------------------------
/webui/app.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | from tabs import (
 3 |     create_data_processing_tab,
 4 |     create_tool_definition_tab,
 5 |     create_reward_definition_tab,
 6 |     create_training_deployment_tab,
 7 |     create_project_management_tab
 8 | )
 9 | 
10 | def create_app():
11 |     """创建主应用
12 |     
13 |     整合所有标签页模块，创建完整的 WebUI 应用。
14 |     每个标签页都是独立的模块，便于维护和扩展。
15 |     """
16 |     with gr.Blocks(title="RL Factory WebUI") as app:
17 |         gr.Markdown("# RL Factory WebUI")
18 |         gr.Markdown("""
19 |         欢迎使用 RL Factory WebUI，这是一个用于强化学习实验管理的工具。
20 |         """)
21 |         
22 |         with gr.Tabs() as tabs:
23 |             with gr.TabItem("数据处理"):
24 |                 create_data_processing_tab()
25 |             with gr.TabItem("工具定义"):
26 |                 create_tool_definition_tab()
27 |             with gr.TabItem("奖赏定义"):
28 |                 create_reward_definition_tab()
29 |             with gr.TabItem("训练部署"):
30 |                 create_training_deployment_tab()
31 |             with gr.TabItem("项目管理"):
32 |                 create_project_management_tab()
33 |     
34 |     return app
35 | 
36 | if __name__ == "__main__":
37 |     app = create_app()
38 |     app.launch(
39 |         server_name="0.0.0.0",
40 |         server_port=7860,
41 |         share=False,
42 |         debug=True
43 |     ) 


--------------------------------------------------------------------------------
/webui/components/rewards/graders/__init__.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Dict, Type, Any
 3 | 
 4 | 
 5 | class BaseGrader(ABC):
 6 |     """奖赏评分器基类"""
 7 |     
 8 |     @abstractmethod
 9 |     def grade(self, prediction: Any, reference: Any) -> float:
10 |         """评分方法
11 |         
12 |         Args:
13 |             prediction: 预测值
14 |             reference: 参考值
15 |             
16 |         Returns:
17 |             float: 评分结果（0-1之间）
18 |         """
19 |         pass
20 |     
21 |     @property
22 |     @abstractmethod
23 |     def name(self) -> str:
24 |         """评分器名称"""
25 |         pass
26 |     
27 |     @property
28 |     @abstractmethod
29 |     def description(self) -> str:
30 |         """评分器描述"""
31 |         pass
32 | 
33 | 
34 | class GraderRegistry:
35 |     """评分器注册器"""
36 |     
37 |     _registry: Dict[str, Type[BaseGrader]] = {}
38 |     
39 |     @classmethod
40 |     def register(cls, grader_class: Type[BaseGrader]) -> Type[BaseGrader]:
41 |         """注册评分器
42 |         
43 |         Args:
44 |             grader_class: 评分器类
45 |             
46 |         Returns:
47 |             注册的评分器类
48 |         """
49 |         cls._registry[grader_class.name] = grader_class
50 |         return grader_class
51 |     
52 |     @classmethod
53 |     def get(cls, name: str) -> Type[BaseGrader]:
54 |         """获取评分器
55 |         
56 |         Args:
57 |             name: 评分器名称
58 |             
59 |         Returns:
60 |             评分器类
61 |         """
62 |         if name not in cls._registry:
63 |             raise KeyError(f"未找到名为 {name} 的评分器")
64 |         return cls._registry[name]
65 |     
66 |     @classmethod
67 |     def list_graders(cls) -> Dict[str, str]:
68 |         """列出所有已注册的评分器
69 |         
70 |         Returns:
71 |             Dict[str, str]: 评分器名称和描述的字典
72 |         """
73 |         return {name: grader.description for name, grader in cls._registry.items()}
74 | 


--------------------------------------------------------------------------------
/webui/components/rewards/graders/graders.py:
--------------------------------------------------------------------------------
1 | """评分器导入文件，确保所有评分器都被正确注册"""
2 | 
3 | from .qwen_math import QwenMathGrader
4 | 
5 | __all__ = ['QwenMathGrader'] 


--------------------------------------------------------------------------------
/webui/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio>=4.19.2
2 | fastapi>=0.109.0
3 | uvicorn>=0.27.0
4 | python-multipart>=0.0.9
5 | pydantic>=2.6.1 


--------------------------------------------------------------------------------
/webui/run_webui.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 获取脚本所在目录的绝对路径
 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 5 | cd "$SCRIPT_DIR"
 6 | 
 7 | # 安装依赖
 8 | # echo "正在安装依赖..."
 9 | # pip install -r requirements.txt
10 | 
11 | # 启动 WebUI
12 | echo "正在启动 RL Factory WebUI..."
13 | echo "服务将在 http://localhost:7860 启动"
14 | echo "按 Ctrl+C 停止服务"
15 | 
16 | python3 app.py 


--------------------------------------------------------------------------------
/webui/tabs/__init__.py:
--------------------------------------------------------------------------------
 1 | from .data_processing import create_data_processing_tab
 2 | from .tool_definition import create_tool_definition_tab
 3 | from .reward_definition import create_reward_definition_tab
 4 | from .training_deployment import create_training_deployment_tab
 5 | from .project_management import create_project_management_tab
 6 | 
 7 | __all__ = [
 8 |     'create_data_processing_tab',
 9 |     'create_tool_definition_tab',
10 |     'create_reward_definition_tab',
11 |     'create_training_deployment_tab',
12 |     'create_project_management_tab'
13 | ] 


--------------------------------------------------------------------------------
/webui/tabs/data_processing.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | 
 3 | def create_data_processing_tab():
 4 |     """数据处理标签页
 5 |     
 6 |     该标签页用于管理和处理实验数据，包括：
 7 |     - 数据导入导出
 8 |     - 数据预处理
 9 |     - 数据可视化
10 |     - 数据集管理
11 |     """
12 |     with gr.Blocks() as tab:
13 |         gr.Markdown("# 数据处理")
14 |         gr.Markdown("""
15 |         ## 功能说明
16 |         在此标签页中，您可以：
17 |         - 导入和导出实验数据
18 |         - 进行数据预处理和转换
19 |         - 查看数据可视化结果
20 |         - 管理实验数据集
21 |         """)
22 |         # 这里后续会添加具体内容
23 |     return tab 


--------------------------------------------------------------------------------
/webui/tabs/project_management.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | 
 3 | def create_project_management_tab():
 4 |     """项目管理标签页
 5 |     
 6 |     该标签页用于管理实验项目和资源，包括：
 7 |     - 项目管理
 8 |     - 资源监控
 9 |     - 实验记录
10 |     - 结果分析
11 |     """
12 |     with gr.Blocks() as tab:
13 |         gr.Markdown("# 项目管理")
14 |         gr.Markdown("""
15 |         ## 功能说明
16 |         在此标签页中，您可以：
17 |         - 管理实验项目
18 |         - 监控系统资源
19 |         - 记录实验过程
20 |         - 分析实验结果
21 |         """)
22 |         # 这里后续会添加具体内容
23 |     return tab 


--------------------------------------------------------------------------------
/webui/tabs/tool_definition.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | 
 3 | def create_tool_definition_tab():
 4 |     """工具定义标签页
 5 |     
 6 |     该标签页用于定义和管理实验工具，包括：
 7 |     - 工具配置
 8 |     - 工具注册
 9 |     - 工具测试
10 |     - 工具文档
11 |     """
12 |     with gr.Blocks() as tab:
13 |         gr.Markdown("# 工具定义")
14 |         gr.Markdown("""
15 |         ## 功能说明
16 |         在此标签页中，您可以：
17 |         - 配置实验所需的工具
18 |         - 注册新的工具
19 |         - 测试工具功能
20 |         - 查看工具文档
21 |         """)
22 |         # 这里后续会添加具体内容
23 |     return tab 


--------------------------------------------------------------------------------
/webui/tabs/training_deployment.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | 
 3 | def create_training_deployment_tab():
 4 |     """训练部署标签页
 5 |     
 6 |     该标签页用于训练模型和部署实验，包括：
 7 |     - 训练配置
 8 |     - 训练监控
 9 |     - 模型部署
10 |     - 实验评估
11 |     """
12 |     with gr.Blocks() as tab:
13 |         gr.Markdown("# 训练部署")
14 |         gr.Markdown("""
15 |         ## 功能说明
16 |         在此标签页中，您可以：
17 |         - 配置训练参数
18 |         - 监控训练过程
19 |         - 部署训练模型
20 |         - 评估实验结果
21 |         """)
22 |         # 这里后续会添加具体内容
23 |     return tab 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_11d607b7-be32-4947-9087-88f808616b56_30594.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 46189,
 3 |   "iopub_port": 34123,
 4 |   "stdin_port": 36193,
 5 |   "control_port": 32895,
 6 |   "hb_port": 36215,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "e2c14147-c168d7a4bfbd70bf7c43dc9b",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 38489,
 3 |   "iopub_port": 37727,
 4 |   "stdin_port": 37469,
 5 |   "control_port": 34083,
 6 |   "hb_port": 35269,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "2863fae1-3c42df3283f8ef7a517e338f",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 44029,
 3 |   "iopub_port": 41935,
 4 |   "stdin_port": 42815,
 5 |   "control_port": 34975,
 6 |   "hb_port": 49955,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "72406bee-dde69fc444aa49e9fb53a771",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 33685,
 3 |   "iopub_port": 33521,
 4 |   "stdin_port": 46845,
 5 |   "control_port": 33759,
 6 |   "hb_port": 46539,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "c1593018-1aaacb1c0422abc6cae4a134",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_67ad306a-e335-4294-b241-514085b015a3_6550.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 39877,
 3 |   "iopub_port": 36447,
 4 |   "stdin_port": 45291,
 5 |   "control_port": 46463,
 6 |   "hb_port": 55285,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "75a02734-a0b6dea9b859d2e45f8d29a1",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 35669,
 3 |   "iopub_port": 42719,
 4 |   "stdin_port": 39277,
 5 |   "control_port": 34731,
 6 |   "hb_port": 37443,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "771f30ef-c013140eaf7f6fa165404952",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 35535,
 3 |   "iopub_port": 40593,
 4 |   "stdin_port": 34717,
 5 |   "control_port": 44503,
 6 |   "hb_port": 40187,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "fa3bde5d-b9b9e82f4d83612ec4f29d20",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/kernel_connection_file_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shell_port": 38917,
 3 |   "iopub_port": 44457,
 4 |   "stdin_port": 36495,
 5 |   "control_port": 41571,
 6 |   "hb_port": 58689,
 7 |   "ip": "127.0.0.1",
 8 |   "key": "ae264de6-d8099f5dedac85249db6aafe",
 9 |   "transport": "tcp",
10 |   "signature_scheme": "hmac-sha256",
11 |   "kernel_name": ""
12 | }


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_11d607b7-be32-4947-9087-88f808616b56_30594.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_67ad306a-e335-4294-b241-514085b015a3_6550.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------
/workspace/tools/code_interpreter/launch_kernel_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.py:
--------------------------------------------------------------------------------
1 | 
2 | from ipykernel import kernelapp as app
3 | app.launch_new_instance()
4 | 


--------------------------------------------------------------------------------