├── .gitignore ├── LICENSE ├── README.md ├── assets └── rl-factory.png ├── docker ├── Apptainerfile.rocm ├── Dockerfile.megatron ├── Dockerfile.ngc.vllm ├── Dockerfile.ngc.vllm0.8 ├── Dockerfile.ngc.vllm0.8.sagemaker ├── Dockerfile.rocm ├── Dockerfile.sglang ├── Dockerfile.vemlp.vllm.te ├── Dockerfile.vllm.sglang.megatron └── Dockfile.ngc.vllm0.8 ├── docs ├── Makefile ├── README.md ├── README_vllm0.7.md ├── README_vllm0.8.md ├── _static │ ├── js │ │ └── runllm-widget.js │ └── logo.png ├── advance │ ├── checkpoint.rst │ ├── dpo_extension.rst │ ├── fsdp_extension.rst │ ├── megatron_extension.rst │ ├── placement.rst │ └── rope.rst ├── amd_tutorial │ ├── amd_build_dockerfile.md │ ├── amd_build_dockerfile_page.rst │ ├── amd_existing_docker.md │ └── amd_vllm_page.rst ├── api │ ├── trainer.rst │ └── utils.rst ├── conf.py ├── data.rst ├── examples │ ├── config.rst │ ├── gsm8k_example.rst │ ├── multi_modal_example.rst │ ├── ppo_code_architecture.rst │ └── sandbox_fusion_example.rst ├── experiment │ └── ppo.rst ├── faq │ └── faq.rst ├── hybrid_flow.rst ├── index.rst ├── perf │ ├── device_tuning.rst │ └── perf_tuning.rst ├── preparation │ ├── prepare_data.rst │ └── reward_function.rst ├── requirements-docs.txt ├── rl_factory │ ├── en │ │ ├── centralized_tool_manager.md │ │ ├── framework_design.md │ │ ├── main_tutorial.md │ │ ├── rewards.md │ │ └── tools.md │ ├── main_tutorial.md │ ├── main_tutorial_zh.md │ └── zh │ │ ├── README.md │ │ ├── main_tutorial.md │ │ ├── rewards.md │ │ └── tools.md ├── sglang_multiturn │ └── multiturn.rst ├── start │ ├── install.rst │ ├── multinode.rst │ ├── quickstart.rst │ └── ray_debug_tutorial.rst └── workers │ ├── fsdp_workers.rst │ ├── megatron_workers.rst │ ├── ray_trainer.rst │ └── sglang_worker.rst ├── envs ├── __init__.py ├── base.py ├── configs │ ├── calculator.json │ ├── mcp_tools.pydata │ └── sse_mcp_tools.pydata ├── reward_rollout_example.py ├── search.py ├── tool_manager │ ├── __init__.py │ ├── base_manager.py │ ├── config_manager.py │ ├── qwen2_5_manager.py │ └── qwen3_manager.py ├── tools │ └── search.py └── utils │ ├── get_prompt.py │ ├── mcp_manager.py │ ├── schema.py │ ├── tool_utils.py │ └── util.py ├── examples ├── checkpoint │ ├── run_deepseek_megatron_ckpt.sh │ └── run_qwen_megatron_ckpt.sh ├── data_preprocess │ ├── full_hh_rlhf.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── gsm8k_multiturn_w_tool.py │ ├── hellaswag.py │ ├── math_dataset.py │ └── multiturn.py ├── generation │ ├── run_deepseek7b_mutli_node.sh │ └── run_deepseek_v2_lite_math.sh ├── grpo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_math.sh │ ├── run_deepseek7b_llm_math_megatron.sh │ ├── run_deepseek7b_llm_megatron.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_math.sh │ ├── run_qwen2-7b_math_megatron.sh │ ├── run_qwen2-7b_megatron.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_sgl_megatron.sh │ ├── run_qwen2_5-7b_math_megatron_diff_tp.sh │ ├── run_qwen2_5_vl-7b.sh │ └── run_qwen3-8b.sh ├── ppo_trainer │ ├── naive_chat_scheduler.py │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_modelscope.sh │ ├── run_deepseek7b_llm_sandbox_fusion.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_gemma.sh │ ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ ├── run_qwen2-7b_megatron.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_sglang_seq_balance.sh │ ├── run_qwen2.5-32b.sh │ └── verl_getting_started.ipynb ├── ray │ └── tutorial.ipynb ├── reinforce_plus_plus_trainer │ ├── run_qwen2-7b_math_rf.sh │ └── run_qwen2-7b_math_rf_baseline.sh ├── remax_trainer │ ├── run_qwen2.5-3b_seq_balance.sh │ └── run_qwen2.5-7b_seq_balance.sh ├── rl_factory │ └── reward_rollout_test.sh ├── rloo_trainer │ └── run_qwen2-7b.sh ├── sft │ ├── gsm8k │ │ ├── run_deepseek_6b7.sh │ │ ├── run_gemma_2b.sh │ │ ├── run_gemma_7b.sh │ │ ├── run_qwen_05_peft.sh │ │ ├── run_qwen_05_sp2.sh │ │ └── run_qwen_05_sp2_liger.sh │ └── multiturn │ │ └── run_qwen_05_sp2.sh ├── sglang_multiturn │ ├── README.md │ ├── config │ │ ├── gsm8k_multiturn_grpo.yaml │ │ └── tool_config │ │ │ └── gsm8k_tool_config.yaml │ ├── run_qwen2.5-3b_gsm8k_multiturn.sh │ └── run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh ├── slurm │ └── ray_on_slurm.slurm ├── split_placement │ ├── README.md │ ├── config │ │ └── ppo_trainer_split.yaml │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py └── tuning │ ├── 14b │ └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh │ ├── 32b │ └── qwen2_32B_grpo_8_h20_megatron_vllm.sh │ ├── 70b │ ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh │ └── qwen2-70b_grpo_32_h800_fsdp_vllm.sh │ └── 7b │ └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh ├── generator ├── __init__.py ├── api_generator.py └── base_generator.py ├── install.sh ├── main_grpo.sh ├── main_ppo.sh ├── pyproject.toml ├── rag_server ├── README.md ├── data_process │ └── nq_search.py ├── download.py ├── launch.sh └── retrieval_server.py ├── recipe ├── dapo │ ├── README.md │ ├── config │ │ └── dapo_trainer.yaml │ ├── dapo_ray_trainer.py │ ├── main_dapo.py │ ├── prepare_dapo_data.sh │ ├── run_dapo_early_qwen2.5_32b.sh │ ├── run_dapo_qwen2.5_32b.sh │ ├── run_dapo_wo_ds_qwen2.5_32b.sh │ └── test_dapo_7b.sh ├── drgrpo │ └── README.md ├── prime │ ├── __init__.py │ ├── config │ │ └── prime_trainer.yaml │ ├── main_prime.py │ ├── prime_core_algos.py │ ├── prime_dp_rm.py │ ├── prime_fsdp_workers.py │ ├── prime_ray_trainer.py │ └── run_prime_qwen.sh ├── r1 │ ├── README.md │ ├── __init__.py │ ├── config │ │ └── evaluation.yaml │ ├── data_process.py │ ├── main_eval.py │ ├── reward_score.py │ ├── run_r1_distill_qwen.sh │ └── tasks │ │ ├── __init__.py │ │ ├── gpqa.py │ │ ├── livecodebench.py │ │ └── math.py └── sppo │ ├── README.md │ ├── __init__.py │ ├── config │ └── sppo_trainer.yaml │ ├── dp_actor.py │ ├── main_sppo.py │ ├── run_qwen2.5-7b_rm.sh │ ├── sppo_ray_trainer.py │ └── sppo_worker.py ├── requirements.txt ├── requirements_sglang.txt ├── scripts ├── converter_hf_to_mcore.py ├── diagnose.py ├── format.sh ├── install_nginx.sh ├── install_vllm_sglang_mcore.sh ├── model_merger.py ├── nq_search.py ├── run_vllm_with_nginx.sh └── vllm_server.sh ├── setup.py ├── tests ├── __init__.py ├── checkpoint │ ├── run_deepseek_megatron_ckpt.sh │ ├── run_qwen_megatron_ckpt.sh │ └── test_fsdp_ckpt.py ├── distributed │ ├── run_all.sh │ └── test_tensor_dict.py ├── distro │ └── requirements.py ├── e2e │ ├── __init__.py │ ├── arithmetic_sequence │ │ ├── data │ │ │ └── create_dataset.py │ │ ├── model │ │ │ ├── config.json │ │ │ ├── create_model_tokenizer.py │ │ │ ├── generation_config.json │ │ │ ├── model.safetensors │ │ │ └── tokenizer_config.json │ │ └── rl │ │ │ ├── README.md │ │ │ └── main_trainer.py │ ├── check_custom_rwd_fn.py │ ├── check_results.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ ├── __init__.py │ │ │ ├── task.py │ │ │ └── tokenizer.py │ ├── generation │ │ └── run_gen_qwen05.sh │ ├── ppo_trainer │ │ ├── run_function_reward.sh │ │ └── run_model_reward.sh │ ├── run_dapo.sh │ ├── run_deepseek_grpo.sh │ ├── run_deepseek_grpo_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_deepseek_megatron_parallelism.sh │ ├── run_gsm8k_fsdp_sgl_multiturn_w_tool.sh │ ├── run_ppo_trainer_megatron.sh │ ├── run_prime.sh │ ├── run_qwen2vl_geo3k_function_rm.sh │ ├── run_qwen_grpo.sh │ ├── run_qwen_grpo_megatron.sh │ ├── run_qwen_gsm8k_custom_function_rm.sh │ ├── run_qwen_gsm8k_function_rm.sh │ ├── run_qwen_gsm8k_function_rm_grpo.sh │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_function_rm_remax.sh │ ├── run_qwen_gsm8k_model_rm.sh │ ├── run_qwen_gsm8k_model_rm_liger_kernel.sh │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ ├── run_qwen_gsm8k_prime.sh │ ├── run_qwen_megatron.sh │ ├── run_qwen_megatron_parallelism.sh │ ├── run_r1_distill_qwen_aime24_eval.sh │ ├── run_ray_trainer.sh │ ├── run_ray_trainer_fire_sampling.sh │ ├── run_ray_trainer_rmpad.sh │ ├── run_sppo.sh │ ├── run_test.sh │ └── sft │ │ ├── run_sft.sh │ │ └── test_sp_loss_match.py ├── generation │ └── run_gen_qwen05.sh ├── gpu_utility │ ├── test_memory_buffers.py │ ├── test_ops.py │ └── test_torch_functional.py ├── kernels │ └── test_linear_cross_entropy.py ├── kill_github_tests.sh ├── model │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── models │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── ray │ ├── check_worker_alive │ │ └── main.py │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_check_worker_alive.py │ ├── test_colocated_workers.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_ray_local_envs.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── ray_cpu │ ├── check_worker_alive │ │ └── main.py │ ├── test_auto_padding.py │ ├── test_check_worker_alive.py │ ├── test_decorator.py │ ├── test_fused_workers.py │ └── test_ray_local_envs.py ├── ray_gpu │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_colocated_workers.py │ ├── test_colocated_workers_fused.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── reward_score │ └── test_sandbox_fusion.py ├── rl_factory │ ├── envs │ │ └── test_tool_use.py │ ├── generator │ │ ├── async_generator_test.py │ │ ├── async_results.csv │ │ └── test.sh │ ├── rewarder │ │ └── test_parallel.py │ └── test_qwen3_manager.py ├── rollout │ ├── run_fsdp_vllm.py │ ├── test_sglang_spmd.py │ ├── test_vllm_hf_loader.py │ └── test_vllm_spmd.py ├── sandbox │ └── test_sandbox.py ├── sanity │ ├── check_license.py │ └── test_import.py ├── sft │ ├── run_sft.sh │ ├── run_sft_qwen05_peft.sh │ ├── run_sft_qwen05_sp2_liger.sh │ ├── run_sft_sp_loss_match.sh │ └── test_sp_loss_match.py ├── single_controller │ └── base │ │ └── test_decorator.py ├── test_protocol.py ├── trainer │ ├── __init__.py │ └── ppo │ │ ├── __init__.py │ │ └── test_metric_utils.py ├── utility │ └── test_tensor_dict_utilities.py ├── utils │ ├── cpu_tests │ │ ├── test_fs.py │ │ ├── test_import_utils.py │ │ ├── test_model.py │ │ ├── test_module.py │ │ └── test_timeout_decorator.py │ └── gpu_tests │ │ ├── checkpoint │ │ └── test_fsdp_ckpt.py │ │ ├── dataset │ │ ├── test_multiturn_sft_dataset.py │ │ ├── test_rl_dataset.py │ │ ├── test_rm_dataset.py │ │ └── test_sft_dataset.py │ │ ├── test_flops_counter.py │ │ ├── test_seqlen_balancing.py │ │ └── test_torch_functional.py ├── verl │ └── utils │ │ └── dataset │ │ ├── test_rl_dataset.py │ │ ├── test_rm_dataset.py │ │ └── test_sft_dataset.py └── workers │ └── rollout │ ├── async_rollout_utils.py │ ├── run_fsdp_vllm.py │ ├── test_hf_rollout.py │ ├── test_sglang_async_rollout_w_tools.py │ ├── test_sglang_async_spmd.py │ ├── test_sglang_spmd.py │ ├── test_vllm_hf_loader.py │ ├── test_vllm_multi_turn.py │ ├── test_vllm_spmd.py │ ├── test_vllm_tool_calling.py │ └── utils_sglang.py ├── verl ├── __init__.py ├── models │ ├── README.md │ ├── __init__.py │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── llama_loader.py │ │ │ ├── llama_loader_depracated.py │ │ │ └── llama_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_llama_megatron.py │ ├── mcore │ │ ├── __init__.py │ │ ├── config_converter.py │ │ ├── loader.py │ │ ├── model_forward.py │ │ ├── model_initializer.py │ │ ├── readme.md │ │ ├── registry.py │ │ ├── saver.py │ │ ├── util.py │ │ └── weight_converter.py │ ├── qwen2 │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── qwen2_loader.py │ │ │ ├── qwen2_loader_depracated.py │ │ │ └── qwen2_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_qwen2_megatron.py │ ├── registry.py │ ├── transformers │ │ ├── __init__.py │ │ ├── llama.py │ │ ├── monkey_patch.py │ │ ├── qwen2.py │ │ ├── qwen2_5_vl.py │ │ └── qwen2_vl.py │ └── weight_loader_registry.py ├── protocol.py ├── single_controller │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── decorator.py │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ ├── worker.py │ │ │ └── worker_group.py │ │ ├── register_center │ │ │ ├── __init__.py │ │ │ └── ray.py │ │ ├── worker.py │ │ └── worker_group.py │ └── ray │ │ ├── __init__.py │ │ ├── base.py │ │ └── megatron.py ├── third_party │ ├── __init__.py │ ├── sglang │ │ ├── __init__.py │ │ └── parallel_state.py │ └── vllm │ │ ├── __init__.py │ │ ├── vllm_v_0_3_1 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── tokenizer.py │ │ ├── weight_loaders.py │ │ └── worker.py │ │ ├── vllm_v_0_4_2 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py │ │ ├── vllm_v_0_5_4 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py │ │ └── vllm_v_0_6_3 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py ├── tools │ ├── __init__.py │ ├── base_tool.py │ ├── gsm8k_tool.py │ └── schemas.py ├── trainer │ ├── __init__.py │ ├── config │ │ ├── evaluation.yaml │ │ ├── generation.yaml │ │ ├── ppo_megatron_trainer.yaml │ │ ├── ppo_trainer.yaml │ │ └── sft_trainer.yaml │ ├── fsdp_sft_trainer.py │ ├── main_eval.py │ ├── main_generation.py │ ├── main_ppo.py │ ├── ppo │ │ ├── __init__.py │ │ ├── core_algos.py │ │ ├── metric_utils.py │ │ ├── ray_trainer.py │ │ └── reward.py │ └── runtime_env.yaml ├── utils │ ├── __init__.py │ ├── checkpoint │ │ ├── __init__.py │ │ ├── checkpoint_manager.py │ │ ├── fsdp_checkpoint_manager.py │ │ └── megatron_checkpoint_manager.py │ ├── config.py │ ├── dataset │ │ ├── README.md │ │ ├── __init__.py │ │ ├── multiturn_sft_dataset.py │ │ ├── rl_dataset.py │ │ ├── rm_dataset.py │ │ ├── sft_dataset.py │ │ └── vision_utils.py │ ├── debug │ │ ├── __init__.py │ │ ├── performance.py │ │ ├── profile.py │ │ └── trajectory_tracker.py │ ├── distributed.py │ ├── experimental │ │ ├── __init__.py │ │ └── torch_functional.py │ ├── flops_counter.py │ ├── fs.py │ ├── fsdp_utils.py │ ├── hdfs_io.py │ ├── import_utils.py │ ├── logger │ │ ├── __init__.py │ │ └── aggregate_logger.py │ ├── logging_utils.py │ ├── megatron │ │ ├── __init__.py │ │ ├── memory.py │ │ ├── optimizer.py │ │ ├── pipeline_parallel.py │ │ ├── sequence_parallel.py │ │ └── tensor_parallel.py │ ├── megatron_utils.py │ ├── memory_buffer.py │ ├── metric │ │ ├── __init__.py │ │ └── utils.py │ ├── model.py │ ├── net_utils.py │ ├── py_functional.py │ ├── ray_utils.py │ ├── rendezvous │ │ ├── __init__.py │ │ └── ray_backend.py │ ├── reward_score │ │ ├── __init__.py │ │ ├── geo3k.py │ │ ├── gsm8k.py │ │ ├── math.py │ │ ├── math_batch.py │ │ ├── math_dapo.py │ │ ├── math_verify.py │ │ ├── prime_code │ │ │ ├── __init__.py │ │ │ ├── testing_util.py │ │ │ └── utils.py │ │ ├── prime_math │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ │ ├── sandbox_fusion │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ └── search.py │ ├── seqlen_balancing.py │ ├── tokenizer.py │ ├── torch_dtypes.py │ ├── torch_functional.py │ ├── tracking.py │ ├── ulysses.py │ ├── vllm_request.py │ └── vllm_utils.py ├── version │ └── version └── workers │ ├── __init__.py │ ├── actor │ ├── __init__.py │ ├── base.py │ ├── dp_actor.py │ └── megatron_actor.py │ ├── critic │ ├── __init__.py │ ├── base.py │ ├── dp_critic.py │ └── megatron_critic.py │ ├── fsdp_workers.py │ ├── megatron_workers.py │ ├── reward_manager │ ├── __init__.py │ ├── batch.py │ ├── dapo.py │ ├── naive.py │ ├── parallel.py │ └── prime.py │ ├── reward_model │ ├── __init__.py │ ├── base.py │ └── megatron │ │ ├── __init__.py │ │ └── reward_model.py │ ├── rollout │ ├── __init__.py │ ├── async_server.py │ ├── base.py │ ├── hf_rollout.py │ ├── naive │ │ ├── __init__.py │ │ └── naive_rollout.py │ ├── schemas.py │ ├── sglang_rollout │ │ ├── __init__.py │ │ ├── async_sglang_rollout.py │ │ └── sglang_rollout.py │ ├── tokenizer.py │ └── vllm_rollout │ │ ├── __init__.py │ │ ├── fire_vllm_rollout.py │ │ ├── vllm_async_server.py │ │ ├── vllm_rollout.py │ │ └── vllm_rollout_spmd.py │ └── sharding_manager │ ├── __init__.py │ ├── base.py │ ├── fsdp_sglang.py │ ├── fsdp_ulysses.py │ ├── fsdp_vllm.py │ ├── fsdp_vllm_reward.py │ ├── megatron_sglang.py │ └── megatron_vllm.py ├── webui ├── README.md ├── app.py ├── components │ └── rewards │ │ └── graders │ │ ├── __init__.py │ │ ├── base.py │ │ ├── graders.py │ │ └── qwen_math.py ├── requirements.txt ├── run_webui.sh └── tabs │ ├── __init__.py │ ├── data_processing.py │ ├── project_management.py │ ├── reward_definition.py │ ├── tool_definition.py │ └── training_deployment.py └── workspace └── tools └── code_interpreter ├── kernel_connection_file_11d607b7-be32-4947-9087-88f808616b56_30594.json ├── kernel_connection_file_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.json ├── kernel_connection_file_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.json ├── kernel_connection_file_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.json ├── kernel_connection_file_67ad306a-e335-4294-b241-514085b015a3_6550.json ├── kernel_connection_file_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.json ├── kernel_connection_file_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.json ├── kernel_connection_file_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.json ├── launch_kernel_11d607b7-be32-4947-9087-88f808616b56_30594.py ├── launch_kernel_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.py ├── launch_kernel_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.py ├── launch_kernel_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.py ├── launch_kernel_67ad306a-e335-4294-b241-514085b015a3_6550.py ├── launch_kernel_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.py ├── launch_kernel_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.py └── launch_kernel_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.py /assets/rl-factory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/d0615d11b1f2e201f5a42403b1fc4cb01eb2db95/assets/rl-factory.png -------------------------------------------------------------------------------- /docker/Apptainerfile.rocm: -------------------------------------------------------------------------------- 1 | Bootstrap: docker 2 | 3 | # Support - Traing: fsdp; Inference: vllm 4 | # FROM: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 5 | # Support - Traing: fsdp; Inference: vllm, sglang 6 | FROM lmsysorg/sglang:v0.4.5-rocm630 7 | 8 | %environment 9 | export PYTORCH_ROCM_ARCH="gfx90a;gfx942" 10 | 11 | export HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__" 12 | export CFLAGS="-D__HIP_PLATFORM_AMD__" 13 | export CXXFLAGS="-D__HIP_PLATFORM_AMD__" 14 | 15 | %post 16 | # Create source directory 17 | mkdir -p /opt/src 18 | 19 | # Uninstall and reinstall vllm 20 | pip uninstall -y vllm 21 | cd /opt/src 22 | git clone -b v0.6.3 https://github.com/vllm-project/vllm.git 23 | cd vllm 24 | MAX_JOBS=$(nproc) python3 setup.py install 25 | cd /opt 26 | rm -rf /opt/src/vllm 27 | 28 | # Install dependencies 29 | pip install "tensordict<0.6" --no-deps 30 | pip install accelerate \ 31 | codetiming \ 32 | datasets \ 33 | dill \ 34 | hydra-core \ 35 | liger-kernel \ 36 | numpy \ 37 | pandas \ 38 | peft \ 39 | "pyarrow>=15.0.0" \ 40 | pylatexenc \ 41 | "ray[data,train,tune,serve]" \ 42 | torchdata \ 43 | transformers \ 44 | wandb \ 45 | orjson \ 46 | pybind11 47 | 48 | # Clone and install verl from GitHub 49 | cd /opt 50 | git clone https://github.com/volcengine/verl.git 51 | cd verl 52 | # Uncomment to use a specific version 53 | # git checkout v0.3.0.post0 54 | pip install -e . --no-deps 55 | 56 | # Install torch_memory_saver 57 | pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps -------------------------------------------------------------------------------- /docker/Dockerfile.megatron: -------------------------------------------------------------------------------- 1 | FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 2 | 3 | RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable 4 | 5 | RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e . -------------------------------------------------------------------------------- /docker/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | # Build the docker in the repo dir: 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 . 3 | # docker images # you can find your built docker 4 | 5 | 6 | # Support - Traing: fsdp; Inference: vllm 7 | # FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 8 | # Support - Traing: fsdp; Inference: vllm, sglang 9 | FROM lmsysorg/sglang:v0.4.6.post1-rocm630 10 | 11 | # Set working directory 12 | # WORKDIR $PWD/app 13 | 14 | # Set environment variables 15 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942" 16 | 17 | ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__" 18 | ENV CFLAGS="-D__HIP_PLATFORM_AMD__" 19 | ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__" 20 | 21 | # Install vllm 22 | RUN pip uninstall -y vllm && \ 23 | rm -rf vllm && \ 24 | git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \ 25 | cd vllm && \ 26 | MAX_JOBS=$(nproc) python3 setup.py install && \ 27 | cd .. && \ 28 | rm -rf vllm 29 | 30 | # Copy the entire project directory 31 | COPY . . 32 | 33 | # Install dependencies 34 | RUN pip install "tensordict<0.6" --no-deps && \ 35 | pip install accelerate \ 36 | codetiming \ 37 | datasets \ 38 | dill \ 39 | hydra-core \ 40 | liger-kernel \ 41 | numpy \ 42 | pandas \ 43 | peft \ 44 | "pyarrow>=15.0.0" \ 45 | pylatexenc \ 46 | "ray[data,train,tune,serve]>=2.45.0" \ 47 | torchdata \ 48 | transformers \ 49 | wandb \ 50 | orjson \ 51 | pybind11 && \ 52 | pip install -e . --no-deps 53 | 54 | # Install torch_memory_saver 55 | RUN pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps -------------------------------------------------------------------------------- /docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE . 2 | 3 | # the one in docker.io is an alias for the one veturbo 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN pip3 config unset global.index-url 10 | 11 | # transformers 4.47.0 contains the following bug: 12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask' 13 | RUN pip3 install --no-cache-dir \ 14 | torch==2.4.0 \ 15 | accelerate \ 16 | codetiming \ 17 | dill \ 18 | hydra-core \ 19 | numpy \ 20 | pybind11 \ 21 | tensordict \ 22 | "transformers <= 4.46.0" 23 | 24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation 25 | 26 | # vllm depends on ray 27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10 28 | 29 | # install apex 30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 31 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 32 | git+https://github.com/NVIDIA/apex 33 | 34 | # install Transformer Engine 35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req 36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM 37 | # - cudnn is required by TransformerEngine 38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \ 39 | # pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0 40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation 41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7 42 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # verl documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and navigate to http://localhost:8000 to view the documentation. -------------------------------------------------------------------------------- /docs/README_vllm0.8.md: -------------------------------------------------------------------------------- 1 | # Upgrading to vLLM >= 0.8 2 | 3 | ## Installation 4 | 5 | Note: This version of verl+vLLM 0.8+ supports **FSDP** for training and **vLLM** for rollout. 6 | 7 | ```bash 8 | # Create the conda environment 9 | conda create -n verl python==3.10 10 | conda activate verl 11 | 12 | # Install verl 13 | git clone https://github.com/volcengine/verl.git 14 | cd verl 15 | pip3 install -e . 16 | 17 | # Install the latest stable version of vLLM 18 | pip3 install vllm==0.8.3 19 | 20 | # Install flash-attn 21 | pip3 install flash-attn --no-build-isolation 22 | 23 | ``` 24 | 25 | We have a pre-built docker image for verl+vLLM 0.8.3. You can direct import it with the following command: 26 | 27 | ```bash 28 | docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0 29 | ``` 30 | 31 | ## Features 32 | 33 | vLLM 0.8+ supports cuda graph and V1 engine by default in verl. To enable these features, remember to add the following lines to the bash script: 34 | 35 | ```bash 36 | actor_rollout_ref.rollout.enforce_eager=False \ 37 | actor_rollout_ref.rollout.free_cache_engine=False \ 38 | ``` 39 | 40 | and also **remove** the environment variable if it exists: 41 | 42 | ```bash 43 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs: 44 | # export VLLM_ATTENTION_BACKEND=XFORMERS 45 | ``` 46 | 47 | ## Notes 48 | 49 | When you just directly upgrade vllm>=0.8, some dependency packages may undergo version changes. If you encounter the following problems: 50 | 51 | ```bash 52 | in from torch.multiprocessing.reductions import ForkingPickler ImportError: cannot import name 'ForkingPickler' from 'torch.multiprocessing.reductions' (/opt/conda/lib/python3.11/site-packages/torch/multiprocessing/reductions.py) 53 | ``` 54 | 55 | You need to upgrade `tensordict` to version 0.6.2 using the command `pip install tensordict==0.6.2`. 56 | -------------------------------------------------------------------------------- /docs/_static/js/runllm-widget.js: -------------------------------------------------------------------------------- 1 | document.addEventListener("DOMContentLoaded", function () { 2 | var script = document.createElement("script"); 3 | script.type = "module"; 4 | script.id = "runllm-widget-script"; 5 | script.src = "https://widget.runllm.com"; 6 | script.setAttribute("version", "stable"); 7 | script.setAttribute("crossorigin", "true"); 8 | script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); 9 | script.setAttribute("runllm-name", "verl Chatbot"); 10 | script.setAttribute("runllm-position", "TOP_RIGHT"); 11 | script.setAttribute("runllm-assistant-id", "679"); 12 | script.async = true; 13 | document.head.appendChild(script); 14 | }); -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/d0615d11b1f2e201f5a42403b1fc4cb01eb2db95/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /docs/advance/rope.rst: -------------------------------------------------------------------------------- 1 | RoPE Scaling override 2 | ======================================= 3 | 4 | Some models such as `Qwen/Qwen2.5-7B-Instruct `_ support RoPE Scaling but don't have it defined in their config.json file. 5 | For example, this model supports this configuration: 6 | 7 | .. code:: python 8 | 9 | { 10 | ..., 11 | "rope_scaling": { 12 | "factor": 4.0, 13 | "original_max_position_embeddings": 32768, 14 | "type": "yarn" 15 | } 16 | } 17 | 18 | 19 | 20 | In order to support a longer context for such models, you must override the model configs when starting the trainer. 21 | 22 | PPO example: 23 | 24 | .. code:: bash 25 | 26 | +actor_rollout_ref.model.override_config.rope_scaling.type=yarn \ 27 | +actor_rollout_ref.model.override_config.rope_scaling.factor=4.0 \ 28 | +actor_rollout_ref.model.override_config.rope_scaling.original_max_position_embeddings=32768 \ 29 | 30 | 31 | And for the critic model 32 | 33 | .. code:: bash 34 | 35 | +critic.model.override_config.rope_scaling.type=yarn \ 36 | +critic.model.override_config.rope_scaling.factor=4.0 \ 37 | +critic.model.override_config.rope_scaling.original_max_position_embeddings=32768 \ 38 | -------------------------------------------------------------------------------- /docs/api/trainer.rst: -------------------------------------------------------------------------------- 1 | Trainers 2 | ========================= 3 | 4 | Trainers drive the training loop. Introducing new trainer classes in case of new training paradiam is encouraged. 5 | 6 | .. autosummary:: 7 | :nosignatures: 8 | 9 | verl.trainer.ppo.ray_trainer.RayPPOTrainer 10 | 11 | 12 | Core APIs 13 | ~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: verl.trainer.ppo.ray_trainer.RayPPOTrainer 16 | 17 | .. automodule:: verl.utils.tokenizer 18 | :members: hf_tokenizer 19 | 20 | .. automodule:: verl.single_controller 21 | :members: Worker, WorkerGroup, ClassWithInitArgs, ResourcePool 22 | -------------------------------------------------------------------------------- /docs/api/utils.rst: -------------------------------------------------------------------------------- 1 | Training utils 2 | ========================= 3 | 4 | Core APIs 5 | ~~~~~~~~~~~~~~~~~ 6 | 7 | .. automodule:: verl.utils.metric 8 | :members: reduce_metrics 9 | -------------------------------------------------------------------------------- /docs/examples/multi_modal_example.rst: -------------------------------------------------------------------------------- 1 | Multi-Modal Example Architecture 2 | ================================= 3 | 4 | Introduction 5 | ------------ 6 | 7 | Now, verl has supported multi-modal training. You can use fsdp and 8 | vllm/sglang to start a multi-modal RL task. Megatron supports is also 9 | on the way. 10 | 11 | Follow the steps below to quickly start a multi-modal RL task. 12 | 13 | Step 1: Prepare dataset 14 | ----------------------- 15 | 16 | .. code:: python 17 | 18 | # it will be saved in the $HOME/data/geo3k folder 19 | python examples/data_preprocess/geo3k.py 20 | 21 | Step 2: Download Model 22 | ---------------------- 23 | 24 | .. code:: bash 25 | 26 | # download the model from huggingface 27 | python3 -c "import transformers; transformers.pipeline(model='Qwen/Qwen2.5-VL-7B-Instruct')" 28 | 29 | Step 3: Perform GRPO training with multi-modal model on Geo3K Dataset 30 | --------------------------------------------------------------------- 31 | 32 | .. code:: bash 33 | 34 | # run the task 35 | bash examples/grpo_trainer/run_qwen2_5_vl-7b.sh 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme 10 | 11 | # pin tokenizers version to avoid env_logger version req 12 | tokenizers==0.19.1 13 | -------------------------------------------------------------------------------- /docs/sglang_multiturn/multiturn.rst: -------------------------------------------------------------------------------- 1 | Multi-turn Rollout Support 2 | ========================= 3 | 4 | Basic Configuration 5 | ~~~~~~~~~~~~~~~~~ 6 | 7 | To enable multi-turn rollout, make sure to configure the following fields in your rollout configuration: 8 | 9 | .. code-block:: yaml 10 | 11 | actor_rollout_ref: 12 | rollout: 13 | multi_turn: True 14 | name: "sglang_async" 15 | 16 | These configuration activates the sglang_async engine for multi-turn interaction during rollout. 17 | 18 | Custom Tool Configuration 19 | ~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | For custom environment interaction tools, you can implement your own tools based on ``verl.tools.base_tool.BaseTool``. Then, specify your tool configurations in a YAML file: 22 | 23 | .. code-block:: yaml 24 | 25 | tools: 26 | - class_name: "" 27 | config: {} 28 | tool_schema: 29 | 30 | You may refer to GSM8KTool_example_configuration_, which is one example of the tool configurations. Its implementation can be found in gsm8k_tool.py_. 31 | 32 | Finally, set the ``tools_config_file`` in your rollout config: 33 | 34 | .. code-block:: yaml 35 | 36 | actor_rollout_ref: 37 | rollout: 38 | tool_kwargs: 39 | tools_config_file: 40 | 41 | This allows integration of customized tool behaviors during actor rollout steps. 42 | 43 | GSM8K Multi-turn Training Performance 44 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 45 | 46 | See the training performance of multi-turn rollout on the GSM8K task HERE_. 47 | 48 | .. _HERE: https://wandb.ai/zhaochenyang20/gsm8k_async_rl/runs/1ro1r7om?nw=nwuserzhaochenyang20 49 | 50 | .. _GSM8KTool_example_configuration: https://github.com/volcengine/verl/blob/main/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml 51 | 52 | .. _gsm8k_tool.py: https://github.com/volcengine/verl/blob/main/verl/tools/gsm8k_tool.py 53 | -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Env as BaseEnv 2 | from .search import SearchEnv 3 | from .reward_rollout_example import RewardRolloutEnv 4 | 5 | __all__ = ['BaseEnv', 'SearchEnv', 'RewardRolloutEnv'] 6 | 7 | TOOL_ENV_REGISTRY = { 8 | 'base': BaseEnv, 9 | 'search': SearchEnv, 10 | 'reward_rollout': RewardRolloutEnv 11 | } -------------------------------------------------------------------------------- /envs/configs/mcp_tools.pydata: -------------------------------------------------------------------------------- 1 | [ 2 | {'mcpServers': { 3 | 'search': { 4 | 'command': 'python3', 5 | 'args': ['envs/tools/search.py'] 6 | } 7 | }} 8 | ] -------------------------------------------------------------------------------- /envs/configs/sse_mcp_tools.pydata: -------------------------------------------------------------------------------- 1 | [ 2 | {'mcpServers': { 3 | 'meituan_search': { 4 | 'url': 'http://xxxx:8080/sse', 5 | } 6 | }} 7 | ] -------------------------------------------------------------------------------- /envs/tool_manager/__init__.py: -------------------------------------------------------------------------------- 1 | from .config_manager import ConfigManager 2 | from .qwen3_manager import QwenManager 3 | from .qwen2_5_manager import Qwen25Manager 4 | 5 | __all__ = ['ConfigManager', 'QwenManager'] 6 | 7 | TOOL_MANAGER_REGISTRY = { 8 | 'config': ConfigManager, 9 | 'qwen3': QwenManager, 10 | 'qwen2_5': Qwen25Manager 11 | } -------------------------------------------------------------------------------- /envs/tool_manager/base_manager.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from abc import ABC, abstractmethod 3 | 4 | 5 | class ToolManager(ABC): 6 | def __init__(self, verl_config) -> None: 7 | self.verl_config = verl_config 8 | self.tool_map = {} 9 | self._build_tools() 10 | 11 | def get_tool(self, name_or_short_name: str): 12 | """通过名称或简写获取工具 13 | 14 | Args: 15 | name_or_short_name: 工具名称或简写 16 | 17 | Returns: 18 | 找到的工具,如果没找到则返回None 19 | """ 20 | name_or_short_name = str(name_or_short_name) 21 | return self.tool_map.get(name_or_short_name, None) 22 | 23 | @property 24 | @abstractmethod 25 | def all_tools(self): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def _build_tools(self): 30 | raise NotImplementedError 31 | 32 | @abstractmethod 33 | def execute_actions(self, responses: List[str]): 34 | raise NotImplementedError 35 | -------------------------------------------------------------------------------- /envs/utils/util.py: -------------------------------------------------------------------------------- 1 | # copy from qwen_agent 2 | import json 3 | import json5 4 | from typing import Optional 5 | 6 | 7 | class ToolServiceError(Exception): 8 | def __init__(self, 9 | exception: Optional[Exception] = None, 10 | code: Optional[str] = None, 11 | message: Optional[str] = None, 12 | extra: Optional[dict] = None): 13 | if exception is not None: 14 | super().__init__(exception) 15 | else: 16 | super().__init__(f'\nError code: {code}. Error message: {message}') 17 | self.exception = exception 18 | self.code = code 19 | self.message = message 20 | self.extra = extra 21 | 22 | 23 | class DocParserError(Exception): 24 | def __init__(self, 25 | exception: Optional[Exception] = None, 26 | code: Optional[str] = None, 27 | message: Optional[str] = None, 28 | extra: Optional[dict] = None): 29 | if exception is not None: 30 | super().__init__(exception) 31 | else: 32 | super().__init__(f'\nError code: {code}. Error message: {message}') 33 | self.exception = exception 34 | self.code = code 35 | self.message = message 36 | self.extra = extra 37 | 38 | 39 | def json_loads(text: str) -> dict: 40 | text = text.strip('\n') 41 | if text.startswith('```') and text.endswith('\n```'): 42 | text = '\n'.join(text.split('\n')[1:-1]) 43 | try: 44 | return json.loads(text) 45 | except json.decoder.JSONDecodeError as json_err: 46 | try: 47 | return json5.loads(text) 48 | except ValueError: 49 | raise json_err -------------------------------------------------------------------------------- /examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet 5 | model_path=deepseek-ai/deepseek-llm-7b-chat 6 | 7 | python3 -m verl.trainer.main_generation \ 8 | trainer.nnodes=2 \ 9 | trainer.n_gpus_per_node=8 \ 10 | data.path=$data_path \ 11 | data.prompt_key=prompt \ 12 | data.n_samples=1 \ 13 | data.output_path=$save_path \ 14 | model.path=$model_path\ 15 | +model.trust_remote_code=True \ 16 | rollout.temperature=1.0 \ 17 | rollout.top_k=50 \ 18 | rollout.top_p=0.7 \ 19 | rollout.prompt_length=2048 \ 20 | rollout.response_length=1024 \ 21 | rollout.tensor_model_parallel_size=16 \ 22 | rollout.gpu_memory_utilization=0.8 23 | -------------------------------------------------------------------------------- /examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet 5 | model_path=deepseek-ai/deepseek-llm-7b-chat 6 | 7 | python3 -m verl.trainer.main_generation \ 8 | trainer.nnodes=1 \ 9 | trainer.n_gpus_per_node=8 \ 10 | data.path=$data_path \ 11 | data.prompt_key=prompt \ 12 | data.n_samples=1 \ 13 | data.output_path=$save_path \ 14 | model.path=$model_path \ 15 | +model.trust_remote_code=True \ 16 | rollout.temperature=1.0 \ 17 | rollout.top_k=50 \ 18 | rollout.top_p=0.7 \ 19 | rollout.prompt_length=2048 \ 20 | rollout.response_length=1024 \ 21 | rollout.tensor_model_parallel_size=2 \ 22 | rollout.gpu_memory_utilization=0.8 23 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=1024 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \ 17 | actor_rollout_ref.actor.use_kl_loss=True \ 18 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 19 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 20 | actor_rollout_ref.actor.entropy_coeff=0 \ 21 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 22 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \ 30 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 31 | algorithm.use_kl_in_reward=False \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console'] \ 34 | trainer.project_name='verl_grpo_example_gsm8k' \ 35 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=20 \ 39 | trainer.test_freq=5 \ 40 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo --config-path=config \ 4 | --config-name='ppo_megatron_trainer.yaml'\ 5 | algorithm.adv_estimator=grpo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=1024 \ 11 | data.filter_overlong_prompts=True \ 12 | data.truncation='error' \ 13 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 18 | actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \ 19 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=4 \ 20 | actor_rollout_ref.actor.use_kl_loss=True \ 21 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 22 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 23 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ 30 | algorithm.kl_ctrl.kl_coef=0.001 \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console','wandb'] \ 33 | trainer.project_name='verl_grpo_example_gsm8k' \ 34 | trainer.experiment_name='deepseek_llm_7b_function_rm_megatron' \ 35 | trainer.n_gpus_per_node=16 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=-1 \ 38 | trainer.test_freq=5 \ 39 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 17 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 18 | actor_rollout_ref.actor.use_kl_loss=True \ 19 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 20 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 21 | actor_rollout_ref.actor.entropy_coeff=0 \ 22 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 23 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 24 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 30 | algorithm.use_kl_in_reward=False \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console','wandb'] \ 33 | trainer.project_name='verl_grpo_example_gsm8k' \ 34 | trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \ 35 | trainer.n_gpus_per_node=8 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=20 \ 38 | trainer.test_freq=5 \ 39 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=gae \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.actor.use_kl_loss=False \ 20 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 21 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \ 22 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 23 | actor_rollout_ref.rollout.name=vllm \ 24 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 25 | critic.optim.lr=1e-5 \ 26 | critic.model.use_remove_padding=True \ 27 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 28 | critic.model.enable_gradient_checkpointing=True \ 29 | critic.ppo_micro_batch_size_per_gpu=32 \ 30 | critic.model.fsdp_config.param_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.use_kl_in_reward=False \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_example_gsm8k' \ 36 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=20 \ 40 | trainer.test_freq=1 \ 41 | trainer.total_epochs=15 $@ 42 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=gae \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=512 \ 8 | data.max_prompt_length=1024 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=google/gemma-2-2b-it \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=False \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.actor.use_kl_loss=False \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=False \ 26 | critic.model.path=google/gemma-2-2b-it \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size_per_gpu=4 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.optimizer_offload=False \ 31 | algorithm.use_kl_in_reward=False \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_example' \ 35 | trainer.experiment_name='gemma2b_function_rm' \ 36 | trainer.n_gpus_per_node=2 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=20 \ 39 | trainer.test_freq=10 \ 40 | trainer.total_epochs=15 $@ 41 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_deepseek_6b7.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | data.prompt_dict_keys=['question'] \ 21 | +data.response_dict_keys=['answer'] \ 22 | data.micro_batch_size_per_gpu=4 \ 23 | model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ 24 | trainer.default_local_dir=$save_path \ 25 | trainer.project_name=gsm8k-sft \ 26 | trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ 27 | trainer.total_epochs=4 \ 28 | trainer.logger=['console','wandb'] \ 29 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_gemma_2b.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | data.prompt_dict_keys=['question'] \ 23 | +data.response_dict_keys=['answer'] \ 24 | data.micro_batch_size_per_gpu=4 \ 25 | model.partial_pretrain=google/gemma-2b-it \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 29 | trainer.total_epochs=2 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_gemma_7b.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=prompt \ 19 | data.response_key=answer \ 20 | data.micro_batch_size_per_gpu=4 \ 21 | model.partial_pretrain=google/gemma-1.1-7b-it \ 22 | trainer.default_local_dir=$save_path \ 23 | trainer.project_name=gsm8k-sft \ 24 | trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ 25 | trainer.total_epochs=4 \ 26 | trainer.logger=['console','wandb'] \ 27 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_qwen_05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_epochs=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | trainer.default_local_dir=$save_path \ 26 | trainer.project_name=gsm8k-sft \ 27 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \ 28 | trainer.logger=['console'] \ 29 | trainer.total_training_steps=1 \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /examples/sft/multiturn/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 6 | exit 1 7 | fi 8 | 9 | nproc_per_node=$1 10 | save_path=$2 11 | 12 | # Shift the arguments so $@ refers to the rest 13 | shift 2 14 | 15 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 16 | -m verl.trainer.fsdp_sft_trainer \ 17 | data.train_files=$HOME/data/multiturn/train.parquet \ 18 | data.val_files=$HOME/data/multiturn/test.parquet \ 19 | data.multiturn.enable=true \ 20 | data.multiturn.messages_key=messages \ 21 | data.micro_batch_size=4 \ 22 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 23 | trainer.default_local_dir=$save_path \ 24 | trainer.project_name=multiturn-sft \ 25 | trainer.experiment_name=multiturn-sft-qwen-2.5-0.5b-instruct-sp2 \ 26 | trainer.logger=['console'] \ 27 | trainer.total_training_steps=1 \ 28 | trainer.default_hdfs_dir=null $@ \ 29 | ulysses_sequence_parallel_size=2 \ 30 | use_remove_padding=true -------------------------------------------------------------------------------- /examples/sglang_multiturn/README.md: -------------------------------------------------------------------------------- 1 | # Multi-Turn Rollout Example (GSM8K) 2 | 3 | This example demonstrates how to perform **multi-turn rollout** using SGLang with a tool-calling capable model (e.g., Qwen2.5-3B) on the GSM8K dataset. 4 | 5 | ## Usage 6 | 7 | ### Step 1: Download GSM8K Dataset 8 | 9 | ```bash 10 | cd examples/data_preprocess 11 | python3 gsm8k_multiturn_w_tool.py 12 | ``` 13 | 14 | This will download and preprocess the GSM8K dataset into ~/data/gsm8k/. 15 | 16 | ### Step 2: Run Multi-Turn Rollout 17 | 18 | If you have 8 GPUs 19 | Use the standard 8-GPU script: 20 | 21 | ```bash 22 | cd your_verl_root_dir 23 | bash examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh 24 | ``` 25 | 26 | If you have only 4 GPUs 27 | Use the fallback 4-GPU script: 28 | 29 | ```bash 30 | cd your_verl_root_dir 31 | bash examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh 32 | ``` 33 | 34 | ## Notes 35 | 36 | - The rollout supports multi-turn conversations with tool-calling capabilities. 37 | - Current tools are used for GSM8K answer evaluation. 38 | - Future versions may extend to search and code interpreter tools. 39 | -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | searchpath: 3 | - file://verl/trainer/config 4 | 5 | defaults: 6 | - ppo_trainer 7 | - _self_ 8 | 9 | data: 10 | max_prompt_length: 1024 11 | max_response_length: 1024 12 | train_batch_size: 256 13 | return_raw_chat: True 14 | 15 | actor_rollout_ref: 16 | hybrid_engine: True 17 | rollout: 18 | name: sglang_async 19 | multi_turn: 20 | enable: True 21 | max_turns: 5 22 | # tool_config_path: "./config/tool_config/gsm8k_tool_config.yaml" 23 | -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml: -------------------------------------------------------------------------------- 1 | tools: 2 | - class_name: "verl.tools.gsm8k_tool.Gsm8kTool" 3 | config: {} 4 | tool_schema: 5 | type: "function" 6 | function: 7 | name: "calc_gsm8k_reward" 8 | description: "A tool for calculating the reward of gsm8k. (1.0 if parsed answer is correct, 0.0 if parsed answer is incorrect or not correctly parsed)" 9 | parameters: 10 | type: "object" 11 | properties: 12 | answer: 13 | type: "string" 14 | description: "The model's answer to the GSM8K math problem, must be a digits" 15 | required: ["answer"] 16 | -------------------------------------------------------------------------------- /examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 main_ppo_split.py \ 4 | algorithm.adv_estimator=gae \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | data.filter_overlong_prompts=True \ 11 | data.truncation='error' \ 12 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.actor.use_kl_loss=False \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 23 | critic.optim.lr=1e-5 \ 24 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 25 | critic.model.enable_gradient_checkpointing=False \ 26 | critic.ppo_micro_batch_size_per_gpu=8 \ 27 | critic.model.fsdp_config.param_offload=False \ 28 | critic.model.fsdp_config.optimizer_offload=False \ 29 | algorithm.use_kl_in_reward=False \ 30 | trainer.critic_warmup=0 \ 31 | trainer.logger=['console','wandb'] \ 32 | trainer.project_name='verl_example_gsm8k' \ 33 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 34 | trainer.n_gpus_per_node=8 \ 35 | trainer.nnodes=1 \ 36 | trainer.save_freq=-1 \ 37 | trainer.total_epochs=15 $@ 38 | -------------------------------------------------------------------------------- /examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | gsm8k_train_path=$HOME/data/rlhf/gsm8k/train.parquet 4 | gsm8k_val_path=$HOME/data/rlhf/math/test.parquet 5 | model_path=Qwen/Qwen2-72B-Instruct 6 | 7 | python3 -m verl.trainer.main_ppo \ 8 | algorithm.adv_estimator=grpo \ 9 | data.train_files=$data_path \ 10 | data.val_files=$gsm8k_val_path \ 11 | data.train_batch_size=1024 \ 12 | data.max_prompt_length=512 \ 13 | data.max_response_length=512 \ 14 | data.filter_overlong_prompts=True \ 15 | data.truncation='error' \ 16 | actor_rollout_ref.model.path=model_path \ 17 | actor_rollout_ref.actor.optim.lr=1e-6 \ 18 | actor_rollout_ref.model.use_remove_padding=True \ 19 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 20 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 21 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 22 | actor_rollout_ref.actor.use_kl_loss=True \ 23 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 24 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 25 | actor_rollout_ref.actor.entropy_coeff=0 \ 26 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 27 | actor_rollout_ref.actor.fsdp_config.param_offload=True \ 28 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ 29 | actor_rollout_ref.rollout.tensor_model_parallel_size=16 \ 30 | actor_rollout_ref.rollout.name=vllm \ 31 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 32 | actor_rollout_ref.rollout.n=5 \ 33 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 34 | algorithm.use_kl_in_reward=False \ 35 | trainer.critic_warmup=0 \ 36 | trainer.logger=['console','wandb'] \ 37 | trainer.project_name='verl_grpo_example_gsm8k' \ 38 | trainer.experiment_name='Qwen2_72B_Instruct' \ 39 | trainer.n_gpus_per_node=8 \ 40 | trainer.nnodes=4 \ 41 | trainer.save_freq=-1 \ 42 | trainer.test_freq=5 \ 43 | trainer.total_epochs=1 $@ -------------------------------------------------------------------------------- /generator/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_generator import BaseGenerator, _GENERATORS 2 | from .api_generator import APIGenerator 3 | 4 | 5 | def get_generator(name: str) -> BaseGenerator: 6 | """ 7 | Return constructor for specified generator 8 | """ 9 | name = "".join(name.lower().split("_")) 10 | if name in _GENERATORS: 11 | return _GENERATORS[name] 12 | else: 13 | raise Exception("Error: Trying to access a generator that has not been registered") 14 | -------------------------------------------------------------------------------- /generator/base_generator.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Dict, Any, Tuple, Optional 3 | from abc import abstractmethod 4 | from easydict import EasyDict 5 | 6 | 7 | # specifies a dictionary of engines 8 | _GENERATORS: Dict[str, Any] = {} # registry 9 | 10 | 11 | def register_generator(name): 12 | """Decorator used to register a generator 13 | Args: 14 | name: Name of the engine type to register 15 | """ 16 | 17 | def register_class(cls, name): 18 | _GENERATORS[name] = cls 19 | setattr(sys.modules[__name__], name, cls) 20 | return cls 21 | 22 | if isinstance(name, str): 23 | name = name.lower() 24 | return lambda c: register_class(c, name) 25 | 26 | cls = name 27 | name = cls.__name__ 28 | register_class(cls, name.lower()) 29 | 30 | return cls 31 | 32 | 33 | @register_generator 34 | class BaseGenerator: 35 | def __init__(self, config: EasyDict): 36 | self.config = config 37 | 38 | @abstractmethod 39 | def generate(self, *args, **kwargs): 40 | pass 41 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | pip3 install accelerate bitsandbytes datasets deepspeed==0.16.4 einops flash-attn==2.7.0.post2 isort jsonlines loralib optimum packaging peft pynvml>=12.0.0 ray[default]==2.42.0 tensorboard torch torchmetrics tqdm transformers==4.48.3 transformers_stream_generator wandb wheel 2 | pip3 install vllm==0.8.5 3 | pip3 install "qwen-agent[code_interpreter]" 4 | pip3 install llama_index bs4 pymilvus infinity_client codetiming tensordict==0.6 omegaconf torchdata==0.10.0 hydra-core easydict dill python-multipart 5 | pip3 install -e . --no-deps 6 | pip3 install faiss-gpu-cu12 -------------------------------------------------------------------------------- /rag_server/README.md: -------------------------------------------------------------------------------- 1 | ## env configuration 2 | ```bash 3 | conda create -n qwen_demo python=3.10 4 | conda activate searchr1 5 | pip3 install torch-2.6.0 torchaudio-2.6.0 torchvision-0.21.0 6 | pip3 install vllm==0.8.5 7 | 8 | # flash attention 2 9 | pip3 install flash-attn --no-build-isolation 10 | pip3 install faiss-gpu-cu12 uvicorn fastapi mcp #这里也可以安装faiss-gpu==1.8 11 | ``` 12 | ## Quick start 13 | 14 | (1) download dataset 15 | ```bash 16 | save_path=/your/path/to/save 17 | python scripts/download.py --save_path $save_path 18 | cat $save_path/part_* > $save_path/e5_Flat.index 19 | gzip -d $save_path/wiki-18.jsonl.gz 20 | ``` 21 | 22 | (2) process NQ dataset. 23 | ```bash 24 | python scripts/nq_search.py 25 | ``` 26 | -------------------------------------------------------------------------------- /rag_server/download.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from huggingface_hub import hf_hub_download 3 | 4 | parser = argparse.ArgumentParser(description="Download files from a Hugging Face dataset repository.") 5 | parser.add_argument("--repo_id", type=str, default="PeterJinGo/wiki-18-e5-index", help="Hugging Face repository ID") 6 | parser.add_argument("--save_path", type=str, required=True, help="Local directory to save files") 7 | 8 | args = parser.parse_args() 9 | 10 | repo_id = "PeterJinGo/wiki-18-e5-index" 11 | for file in ["part_aa", "part_ab"]: 12 | hf_hub_download( 13 | repo_id=repo_id, 14 | filename=file, # e.g., "e5_Flat.index" 15 | repo_type="dataset", 16 | local_dir=args.save_path, 17 | ) 18 | 19 | repo_id = "PeterJinGo/wiki-18-corpus" 20 | hf_hub_download( 21 | repo_id=repo_id, 22 | filename="wiki-18.jsonl.gz", 23 | repo_type="dataset", 24 | local_dir=args.save_path, 25 | ) -------------------------------------------------------------------------------- /rag_server/launch.sh: -------------------------------------------------------------------------------- 1 | #tmux new -s rag_server 2 | #conda activate searchr1 3 | #bash retrieval_launch.sh 4 | #tmux detach 5 | #bash train_ppo.sh 6 | 7 | nvcc --version 8 | 9 | file_path=/your/path/to/PeterGriffinJin/Search-R1/data/rag_data 10 | index_file=$file_path/e5_Flat.index 11 | corpus_file=$file_path/wiki-18.jsonl 12 | retriever=/your/path/to/PeterGriffinJin/Search-R1/huggingface.co/intfloat/e5-base-v2 13 | 14 | python rag_server/retrieval_server.py --index_path $index_file \ 15 | --corpus_path $corpus_file \ 16 | --topk 3 \ 17 | --retriever_model $retriever & 18 | sleep 1200 19 | 20 | # 执行后续请求 21 | curl -X POST http://127.0.0.1:5003/retrieve \ 22 | -H "Content-Type: application/json" \ 23 | -d '{ 24 | "queries": ["What is Python?", "Tell me about neural networks."], 25 | "topk": 3, 26 | "return_scores": true 27 | }' -------------------------------------------------------------------------------- /recipe/dapo/config/dapo_trainer.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | searchpath: 3 | - file://verl/trainer/config 4 | 5 | defaults: 6 | - ppo_trainer 7 | - _self_ 8 | 9 | data: 10 | gen_batch_size: ${data.train_batch_size} 11 | 12 | reward_model: 13 | reward_manager: dapo 14 | overlong_buffer: 15 | enable: False # We try to avoid forgetting to set enable 16 | len: 0 17 | penalty_factor: 0.0 18 | log: False 19 | 20 | algorithm: 21 | filter_groups: 22 | enable: False # We try to avoid forgetting to set enable 23 | metric: null # acc / score / seq_reward / seq_final_reward / ... 24 | max_num_gen_batches: 0 # Non-positive values mean no upper limit 25 | 26 | trainer: 27 | project_name: verl-dapo 28 | -------------------------------------------------------------------------------- /recipe/dapo/prepare_dapo_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -uxo pipefail 3 | 4 | export VERL_HOME=${VERL_HOME:-"${HOME}/verl"} 5 | export TRAIN_FILE=${TRAIN_FILE:-"${VERL_HOME}/data/dapo-math-17k.parquet"} 6 | export TEST_FILE=${TEST_FILE:-"${VERL_HOME}/data/aime-2024.parquet"} 7 | export OVERWRITE=${OVERWRITE:-0} 8 | 9 | mkdir -p "${VERL_HOME}/data" 10 | 11 | if [ ! -f "${TRAIN_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then 12 | wget -O "${TRAIN_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k/resolve/main/data/dapo-math-17k.parquet?download=true" 13 | fi 14 | 15 | if [ ! -f "${TEST_FILE}" ] || [ "${OVERWRITE}" -eq 1 ]; then 16 | wget -O "${TEST_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/AIME-2024/resolve/main/data/aime-2024.parquet?download=true" 17 | fi 18 | -------------------------------------------------------------------------------- /recipe/drgrpo/README.md: -------------------------------------------------------------------------------- 1 | # Dr. GRPO Open-Source Implementation 2 | 3 | 4 | https://github.com/sail-sg/understand-r1-zero 5 | 6 | 7 | This paper suggests a way to calculate the unbiased policy gradient. 8 | 9 | 10 | ## Configuration 11 | ```yaml 12 | actor_rollout_ref: 13 | actor: 14 | loss_agg_mode: "seq-mean-token-sum-norm" # turn off seq-dim averaging 15 | use_kl_loss: False 16 | algorithm: 17 | norm_adv_by_std_in_grpo: False # turn off standard deviation norm 18 | ``` 19 | 20 | , with all other parameters set same as GRPO. 21 | -------------------------------------------------------------------------------- /recipe/prime/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /recipe/r1/README.md: -------------------------------------------------------------------------------- 1 | # DeepSeek R1 Reproduction 2 | 3 | This recipe is under development, if you are interested, checkout the TODO list and join this project! https://github.com/volcengine/verl/issues/708 4 | 5 | ## Reproducing Evaluation 6 | 7 | Eval Results of DS-R1-Distill-Qwen2.5-1.5B (k=8) 8 | 9 | Dataset | Test Results | Reported 10 | -- | -- | -- 11 | GPQA Diamond | 35.3 | 33.8 12 | LiveCodeBench | 16.9 | 16.9 13 | AIME 2024 | 30.4 | 28.9 14 | CNMO 2024 (en) | 45.1 | - 15 | CNMO 2024 (zh) | 41.0 | - 16 | 17 | --- 18 | 19 | Eval Results (DS-R1) 20 | 21 | Dataset | Test Results (k=1) | Test Results (k=4) | Reported 22 | -- | -- | -- | -- 23 | GPQA Diamond | 67.7 | 69.6 | 71.5 24 | LiveCodeBench | 64.7 | 63.1 | 65.9 25 | AIME 2024 | 86.7 | 79.2 | 79.8 26 | CNMO 2024 | 75.0 | 78.5 | 78.8 27 | -------------------------------------------------------------------------------- /recipe/r1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /recipe/r1/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model 7 | 8 | custom_reward_function: 9 | path: null 10 | name: compute_score 11 | 12 | ray_init: 13 | num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. -------------------------------------------------------------------------------- /recipe/r1/reward_score.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def reward_func(data_source, solution_str, ground_truth, extra_info=None): 17 | if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]: 18 | from recipe.r1.tasks import math 19 | 20 | return math.compute_score(solution_str, ground_truth) 21 | elif data_source == "Idavidrein/gpqa": 22 | from recipe.r1.tasks import gpqa 23 | 24 | return gpqa.compute_score(solution_str, ground_truth) 25 | elif data_source in ["livecodebench/code_generation_lite", "livecodebench/code_generation"]: 26 | from recipe.r1.tasks import livecodebench 27 | 28 | return livecodebench.compute_score(solution_str, ground_truth) 29 | else: 30 | raise NotImplementedError 31 | -------------------------------------------------------------------------------- /recipe/r1/run_r1_distill_qwen.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH=Qwen/DeepSeek-R1-Distill-Qwen-1.5B 2 | DATA_PATH=/workspace/datasets/r1_bench 3 | 4 | # Eval Data Process 5 | python3 -m recipe.r1.data_process \ 6 | --local_dir $DATA_PATH \ 7 | --tasks all 8 | 9 | # Generation 10 | python3 -m verl.trainer.main_generation \ 11 | trainer.nnodes=1 \ 12 | trainer.n_gpus_per_node=8 \ 13 | data.path=$DATA_PATH/test.parquet \ 14 | data.prompt_key=prompt \ 15 | data.batch_size=1024 \ 16 | data.n_samples=8 \ 17 | data.output_path=$DATA_PATH/test-output-8.parquet \ 18 | model.path=$MODEL_PATH \ 19 | rollout.temperature=0.6 \ 20 | rollout.top_p=0.95 \ 21 | rollout.prompt_length=1024 \ 22 | rollout.response_length=32768 \ 23 | rollout.tensor_model_parallel_size=1 \ 24 | rollout.gpu_memory_utilization=0.9 \ 25 | rollout.max_num_batched_tokens=65536 26 | 27 | # Evaluation 28 | python3 -m recipe.r1.main_eval \ 29 | data.path=$DATA_PATH/test-output-8.parquet \ 30 | data.prompt_key=prompt \ 31 | data.response_key=responses \ 32 | custom_reward_function.path=recipe/r1/reward_score.py \ 33 | custom_reward_function.name=reward_func 34 | -------------------------------------------------------------------------------- /recipe/r1/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /recipe/r1/tasks/gpqa.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | # Extraction Template from https://github.com/openai/simple-evals/blob/90e3e821cabba2aeb6be651dcb662b253df04225/common.py#L25 18 | ANSWER_PATTERN_MULTICHOICE = r"(?i)Answer[ \t]*:[ \t]*\$?([A-D])\$?" 19 | 20 | 21 | def compute_score(solution_str, ground_truth) -> float: 22 | match = re.search(ANSWER_PATTERN_MULTICHOICE, solution_str) 23 | extracted_answer = match.group(1) if match else None 24 | score = 1.0 if extracted_answer == ground_truth else 0.0 25 | return score 26 | -------------------------------------------------------------------------------- /recipe/r1/tasks/math.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import contextlib 15 | 16 | try: 17 | from math_verify.metric import math_metric 18 | from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig 19 | except ImportError: 20 | print("To use Math-Verify, please install it first by running `pip install math-verify`.") 21 | 22 | 23 | def compute_score(model_output: str, ground_truth: str) -> bool: 24 | verify_func = math_metric( 25 | gold_extraction_target=(LatexExtractionConfig(),), 26 | pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), 27 | ) 28 | ret_score = 0.0 29 | 30 | # Wrap the ground truth in \boxed{} format for verification 31 | ground_truth_boxed = "\\boxed{" + ground_truth + "}" 32 | with contextlib.suppress(Exception): 33 | ret_score, _ = verify_func([ground_truth_boxed], [model_output]) 34 | 35 | return ret_score 36 | -------------------------------------------------------------------------------- /recipe/sppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023-2024 SGLang Team 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /recipe/sppo/config/sppo_trainer.yaml: -------------------------------------------------------------------------------- 1 | # the sppo config will override default ppo_trainer.yaml 2 | 3 | hydra: 4 | searchpath: 5 | - file://verl/trainer/config 6 | 7 | defaults: 8 | - ppo_trainer 9 | - _self_ 10 | 11 | actor_rollout_ref: 12 | actor: 13 | sppo_eta: 1.0 14 | optim: 15 | lr_warmup_steps: 15 16 | rollout: 17 | name: sglang 18 | tensor_model_parallel_size: 2 19 | gpu_memory_utilization: 0.5 20 | val_kwargs: 21 | n: 2 # 2 will trigger validation, 1 will bypass 22 | 23 | algorithm: 24 | adv_estimator: null 25 | sppo_eta: 1.0 26 | 27 | trainer: 28 | log_val_generations: 0 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | liger-kernel 9 | numpy 10 | pandas 11 | peft 12 | pyarrow>=19.0.0 13 | pybind11 14 | pylatexenc 15 | pre-commit 16 | ray[default] 17 | tensordict<=0.6.2 18 | torchdata 19 | transformers 20 | # vllm==0.8.4 21 | wandb 22 | packaging>=20.0 23 | uvicorn 24 | fastapi 25 | qwen-agent 26 | mcp 27 | -------------------------------------------------------------------------------- /requirements_sglang.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | numpy 9 | pandas 10 | peft 11 | pyarrow>=19.0.0 12 | pybind11 13 | pylatexenc 14 | ray[default]>=2.10 15 | tensordict<=0.6.2 16 | torchdata 17 | torchvision 18 | transformers 19 | wandb 20 | sglang[all]==0.4.4.post4 21 | torch-memory-saver>=0.0.5 22 | mcp 23 | qwen_agent 24 | -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests single_controller examples recipe 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/distributed/run_all.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #!/usr/bin/env bash 16 | 17 | set -e -x 18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/create_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | from torch.utils import data 18 | 19 | from tests.e2e.envs.digit_completion import DigitCompletion 20 | 21 | if __name__ == "__main__": 22 | simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9) 23 | all_prompts = simple_task.get_all_prompts() 24 | 25 | # 21 * 6 * 4 26 | train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2]) 27 | train_data = list(train_data) 28 | test_data = list(test_data) 29 | 30 | train_data = [[{"role": "user", "content": str(item)}] for item in train_data] 31 | test_data = [[{"role": "user", "content": str(item)}] for item in test_data] 32 | 33 | print(f"Size of train: {len(train_data)}, size of test: {len(test_data)}") 34 | 35 | train_data = {"prompt": train_data} 36 | test_data = {"prompt": test_data} 37 | 38 | model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__))) 39 | 40 | import pandas as pd 41 | 42 | train_data_frame = pd.DataFrame(train_data) 43 | test_data_frame = pd.DataFrame(test_data) 44 | 45 | train_data_frame.to_parquet(os.path.join(model_folder, "train.parquet")) 46 | test_data_frame.to_parquet(os.path.join(model_folder, "test.parquet")) 47 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaForCausalLM" 4 | ], 5 | "attention_bias": false, 6 | "attention_dropout": 0.0, 7 | "bos_token_id": null, 8 | "eos_token_id": 1, 9 | "hidden_act": "silu", 10 | "hidden_size": 128, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 344, 13 | "max_position_embeddings": 2048, 14 | "mlp_bias": false, 15 | "model_type": "llama", 16 | "num_attention_heads": 4, 17 | "num_hidden_layers": 4, 18 | "num_key_value_heads": 4, 19 | "pad_token_id": 2, 20 | "pretraining_tp": 1, 21 | "rms_norm_eps": 1e-06, 22 | "rope_scaling": null, 23 | "rope_theta": 10000.0, 24 | "tie_word_embeddings": false, 25 | "torch_dtype": "bfloat16", 26 | "transformers_version": "4.43.3", 27 | "use_cache": true, 28 | "vocab_size": 16 29 | } 30 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "eos_token_id": 1, 4 | "pad_token_id": 2, 5 | "transformers_version": "4.43.3" 6 | } 7 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/d0615d11b1f2e201f5a42403b1fc4cb01eb2db95/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "char_ords": [ 3 | 48, 4 | 49, 5 | 50, 6 | 51, 7 | 52, 8 | 53, 9 | 54, 10 | 55, 11 | 56, 12 | 57, 13 | 44, 14 | 58 15 | ], 16 | "model_max_length": 2048, 17 | "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}" 18 | } -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- 1 | # Digit completion 2 | 3 | This is an example of solving a digit completion problem. The problem is defined as below: 4 | 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers. 6 | If the max number is reached, the next number should be modulo with max number. 7 | 8 | For example, 9 | - prompt = [1, 2, 3] 10 | - N = 5 11 | - max_number = 6 12 | 13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1]. 14 | 15 | # Environment definition 16 | 17 | The core definition of the task is defined in tests/e2e/envs/digit_completion/task.py 18 | 19 | It is highly recommended to take a look at it for better understanding. 20 | 21 | 22 | 23 | # Run experiments 24 | 25 | An example of running the task is provided in `tests/e2e/run_ray_trainer.sh`. 26 | 27 | ```bash 28 | bash tests/e2e/run_ray_trainer.sh 29 | ``` 30 | 31 | -------------------------------------------------------------------------------- /tests/e2e/check_custom_rwd_fn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | 18 | def check_congratulations_in_file(output_file): 19 | with open(output_file) as f: 20 | output = f.read() 21 | 22 | success_message = "Congratulations!!! You have called my_reward_function successfully!!!" 23 | assert success_message in output, f"Success message of my_reward_function not found in {output_file}" 24 | print("Check passes") 25 | 26 | 27 | if __name__ == "__main__": 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument("--output_file", required=True, type=str) 30 | 31 | args = parser.parse_args() 32 | 33 | check_congratulations_in_file(args.output_file) 34 | -------------------------------------------------------------------------------- /tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .digit_completion import DigitCompletion 16 | 17 | __all__ = ["DigitCompletion"] 18 | -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from transformers import AutoTokenizer, LlamaConfig 16 | 17 | from .task import DigitCompletion, generate_ground_truth_response 18 | from .tokenizer import CharTokenizer 19 | 20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True) 21 | 22 | __all__ = ["DigitCompletion", "generate_ground_truth_response", "CharTokenizer"] 23 | -------------------------------------------------------------------------------- /tests/e2e/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Tested with 1 & 4 GPUs 3 | set -xeuo pipefail 4 | 5 | MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct} 6 | 7 | NGPUS_PER_NODE=${NGPUS_PER_NODE:-4} 8 | OUTPUT_PATH=${OUTPUT_PATH:-$HOME/data/gen/qwen_05_gen_test.parquet} 9 | GEN_TP=${GEN_TP:-2} # Default tensor parallel size to 2 10 | 11 | python3 -m verl.trainer.main_generation \ 12 | trainer.nnodes=1 \ 13 | trainer.n_gpus_per_node="${NGPUS_PER_NODE}" \ 14 | data.path="${HOME}/data/gsm8k/test.parquet" \ 15 | data.prompt_key=prompt \ 16 | data.n_samples=1 \ 17 | data.output_path="${OUTPUT_PATH}" \ 18 | model.path="${MODEL_ID}" \ 19 | +model.trust_remote_code=True \ 20 | rollout.temperature=1.0 \ 21 | rollout.top_k=50 \ 22 | rollout.top_p=0.7 \ 23 | rollout.prompt_length=2048 \ 24 | rollout.response_length=1024 \ 25 | rollout.tensor_model_parallel_size="${GEN_TP}" \ 26 | rollout.gpu_memory_utilization=0.8 27 | -------------------------------------------------------------------------------- /tests/e2e/run_deepseek_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml 4 | 5 | huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct 6 | 7 | python3 -m verl.trainer.main_ppo --config-path=config \ 8 | --config-name='ppo_megatron_trainer.yaml'\ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.train_batch_size=1024 \ 12 | data.max_prompt_length=512 \ 13 | data.max_response_length=512 \ 14 | actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \ 15 | actor_rollout_ref.actor.optim.lr=2e-6 \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 18 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 23 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 24 | actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \ 25 | critic.optim.lr=2e-5 \ 26 | critic.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size_per_gpu=4 \ 29 | critic.megatron.tensor_model_parallel_size=2 \ 30 | algorithm.kl_ctrl.kl_coef=0.001 \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console'] \ 33 | trainer.project_name='verl_megatron_gsm8k_examples' \ 34 | trainer.experiment_name='deepseek_llm_1b3_function_rm' \ 35 | trainer.n_gpus_per_node=8 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=-1 \ 38 | trainer.test_freq=1 \ 39 | trainer.total_epochs=15 \ 40 | trainer.total_training_steps=3 $@ 41 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_grpo.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=1024 \ 12 | data.filter_overlong_prompts=True \ 13 | data.truncation='error' \ 14 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 15 | actor_rollout_ref.actor.optim.lr=1e-6 \ 16 | actor_rollout_ref.model.use_remove_padding=True \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ 19 | actor_rollout_ref.actor.use_kl_loss=True \ 20 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 21 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 22 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 23 | actor_rollout_ref.actor.fsdp_config.param_offload=True \ 24 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ 25 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 27 | actor_rollout_ref.rollout.name=vllm \ 28 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 29 | actor_rollout_ref.rollout.n=5 \ 30 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 31 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console'] \ 35 | trainer.project_name='verl_grpo_example_gsm8k' \ 36 | trainer.experiment_name='qwen2_7b_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=5 \ 41 | trainer.total_epochs=15 \ 42 | trainer.total_training_steps=2 $@ -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | ENGINE=${1:-vllm} 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.name=$ENGINE \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=True \ 26 | critic.model.path=Qwen/Qwen2.5-0.5B \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size_per_gpu=4 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.optimizer_offload=False \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console'] \ 34 | trainer.project_name='verl_example_gsm8k' \ 35 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=1 \ 39 | trainer.default_local_dir=$HOME/$ENGINE/ckpt/ \ 40 | trainer.total_training_steps=1 41 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | algorithm.kl_ctrl.kl_coef=0.001 \ 25 | algorithm.adv_estimator=grpo \ 26 | trainer.critic_warmup=0 \ 27 | trainer.logger=['console'] \ 28 | trainer.project_name='verl_example_gsm8k' \ 29 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 30 | trainer.n_gpus_per_node=8 \ 31 | trainer.nnodes=1 \ 32 | trainer.save_freq=-1 \ 33 | trainer.total_training_steps=1 $@ 34 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=False \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=False \ 26 | critic.model.path=Qwen/Qwen2.5-0.5B \ 27 | critic.model.enable_gradient_checkpointing=False \ 28 | critic.ppo_micro_batch_size_per_gpu=4 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.optimizer_offload=False \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console'] \ 34 | +trainer.val_before_train=False \ 35 | trainer.project_name='verl_example_gsm8k' \ 36 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.total_training_steps=1 $@ 41 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_remax.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | algorithm.kl_ctrl.kl_coef=0.001 \ 25 | algorithm.adv_estimator=remax \ 26 | trainer.critic_warmup=0 \ 27 | trainer.logger=['console'] \ 28 | trainer.project_name='verl_example_gsm8k' \ 29 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 30 | trainer.n_gpus_per_node=8 \ 31 | trainer.nnodes=1 \ 32 | trainer.save_freq=-1 \ 33 | trainer.total_training_steps=1 $@ 34 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml 4 | 5 | huggingface-cli download Qwen/Qwen2.5-0.5B 6 | 7 | export VLLM_ATTENTION_BACKEND=XFORMERS 8 | 9 | python3 -m verl.trainer.main_ppo --config-path=config \ 10 | --config-name='ppo_megatron_trainer.yaml'\ 11 | data.train_files=$HOME/data/gsm8k/train.parquet \ 12 | data.val_files=$HOME/data/gsm8k/test.parquet \ 13 | data.train_batch_size=1024 \ 14 | data.max_prompt_length=512 \ 15 | data.max_response_length=512 \ 16 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 17 | actor_rollout_ref.actor.optim.lr=2e-6 \ 18 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 19 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 20 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \ 21 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 22 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 23 | actor_rollout_ref.rollout.name=vllm \ 24 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 25 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 26 | actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \ 27 | critic.optim.lr=2e-5 \ 28 | critic.model.path=Qwen/Qwen2.5-0.5B \ 29 | critic.model.enable_gradient_checkpointing=False \ 30 | critic.ppo_micro_batch_size_per_gpu=4 \ 31 | critic.megatron.tensor_model_parallel_size=2 \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console'] \ 35 | trainer.project_name='verl_megatron_gsm8k_examples' \ 36 | trainer.experiment_name='qwen2_5_0b5_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=1 \ 41 | trainer.total_epochs=15 \ 42 | trainer.total_training_steps=3 $@ 43 | -------------------------------------------------------------------------------- /tests/e2e/run_r1_distill_qwen_aime24_eval.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xeuo pipefail 3 | 4 | huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ 5 | --local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B 6 | 7 | python3 -m verl.trainer.main_generation \ 8 | trainer.nnodes=1 \ 9 | trainer.n_gpus_per_node=8 \ 10 | data.path=$HOME/data/r1/test.parquet \ 11 | data.prompt_key=prompt \ 12 | data.batch_size=1024 \ 13 | data.n_samples=1 \ 14 | data.output_path=$HOME/data/r1/test-output-k1.parquet \ 15 | model.path=$HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ 16 | rollout.temperature=0.6 \ 17 | rollout.top_p=0.95 \ 18 | rollout.prompt_length=1024 \ 19 | rollout.response_length=32768 \ 20 | rollout.tensor_model_parallel_size=1 \ 21 | rollout.gpu_memory_utilization=0.95 \ 22 | rollout.max_num_batched_tokens=65536 \ 23 | rollout.enforce_eager=False \ 24 | rollout.free_cache_engine=False 25 | 26 | python3 -m recipe.r1.main_eval \ 27 | data.path=$HOME/data/r1/test-output-k1.parquet \ 28 | data.prompt_key=prompt \ 29 | data.response_key=responses \ 30 | custom_reward_function.path=recipe/r1/reward_score.py \ 31 | custom_reward_function.name=reward_func -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt" 6 | 7 | export PATH=$PATH:~/.local/bin 8 | 9 | rm -rf $OUTPUT_FILE 10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 11 | algorithm.adv_estimator=gae \ 12 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 13 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 14 | data.train_batch_size=800 \ 15 | data.max_prompt_length=16 \ 16 | data.max_response_length=32 \ 17 | data.return_raw_input_ids=True \ 18 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 19 | actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \ 20 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=128 \ 21 | actor_rollout_ref.actor.entropy_coeff=0 \ 22 | actor_rollout_ref.actor.optim.lr=1e-4 \ 23 | actor_rollout_ref.actor.use_kl_loss=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \ 25 | actor_rollout_ref.rollout.name=hf \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 27 | critic.ppo_micro_batch_size_per_gpu=128 \ 28 | critic.model.path=tests/e2e/arithmetic_sequence/model \ 29 | critic.optim.lr=1e-3 \ 30 | algorithm.use_kl_in_reward=False \ 31 | trainer.total_epochs=200 \ 32 | trainer.experiment_name=arithmetic_sequences \ 33 | trainer.logger=['console'] \ 34 | trainer.n_gpus_per_node=1 \ 35 | trainer.test_freq=1 \ 36 | trainer.save_freq=110 | tee $OUTPUT_FILE; 37 | 38 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE 39 | rm -rf $OUTPUT_FILE 40 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_fire_sampling.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt" 6 | 7 | export PATH=$PATH:~/.local/bin 8 | 9 | rm -rf $OUTPUT_FILE 10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 11 | algorithm.adv_estimator=gae \ 12 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 13 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 14 | data.train_batch_size=800 \ 15 | data.val_batch_size=200 \ 16 | data.max_prompt_length=16 \ 17 | data.max_response_length=32 \ 18 | data.return_raw_input_ids=True \ 19 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 20 | actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \ 21 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=128 \ 22 | actor_rollout_ref.actor.entropy_coeff=0 \ 23 | actor_rollout_ref.actor.optim.lr=1e-4 \ 24 | actor_rollout_ref.actor.use_kl_loss=False \ 25 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \ 26 | actor_rollout_ref.rollout.name=hf \ 27 | actor_rollout_ref.rollout.use_fire_sampling=True \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 29 | critic.ppo_micro_batch_size_per_gpu=128 \ 30 | critic.model.path=tests/e2e/arithmetic_sequence/model \ 31 | critic.optim.lr=1e-3 \ 32 | algorithm.use_kl_in_reward=False \ 33 | trainer.total_epochs=200 \ 34 | trainer.experiment_name=arithmetic_sequences \ 35 | trainer.logger=['console'] \ 36 | trainer.n_gpus_per_node=1 \ 37 | trainer.test_freq=1 \ 38 | trainer.save_freq=110 | tee $OUTPUT_FILE; 39 | 40 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE --target 0.19 41 | rm -rf $OUTPUT_FILE 42 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 6 | 7 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 8 | algorithm.adv_estimator=gae \ 9 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 10 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 11 | actor_rollout_ref.actor.use_kl_loss=False \ 12 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 13 | actor_rollout_ref.rollout.name=vllm \ 14 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 15 | actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \ 16 | critic.model.path=Qwen/Qwen2.5-0.5B \ 17 | critic.model.use_remove_padding=True \ 18 | algorithm.use_kl_in_reward=False \ 19 | trainer.total_epochs=1 -------------------------------------------------------------------------------- /tests/e2e/run_sppo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xeuo pipefail 3 | 4 | # in e2e_sppo.yml, we set NUM_GPUS=8 L20 5 | 6 | NUM_GPUS=${NUM_GPUS:-8} 7 | 8 | gsm8k_train_path=./data/math/train.parquet 9 | gsm8k_test_path=./data/math/test.parquet 10 | train_files="['$gsm8k_train_path']" 11 | test_files="['$gsm8k_test_path']" 12 | 13 | exp_name="Qwen2.5-0.5B-Instruct-sppo-minimal" 14 | 15 | python3 -m recipe.sppo.main_sppo \ 16 | data.train_files="$train_files" \ 17 | data.val_files="$test_files" \ 18 | data.train_batch_size=1024 \ 19 | data.max_prompt_length=1024 \ 20 | data.max_response_length=512 \ 21 | data.filter_overlong_prompts=True \ 22 | data.truncation='error' \ 23 | data.return_raw_chat=True \ 24 | actor_rollout_ref.model.path="./models/Qwen2.5-0.5B-Instruct" \ 25 | actor_rollout_ref.actor.optim.lr=1e-6 \ 26 | actor_rollout_ref.model.use_remove_padding=True \ 27 | actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \ 28 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 29 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ 30 | actor_rollout_ref.actor.use_kl_loss=False \ 31 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 32 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 33 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 34 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 35 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 36 | actor_rollout_ref.rollout.name=sglang \ 37 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 38 | algorithm.use_kl_in_reward=False \ 39 | trainer.critic_warmup=0 \ 40 | trainer.logger=['console'] \ 41 | trainer.val_before_train=True \ 42 | trainer.n_gpus_per_node=8 \ 43 | trainer.nnodes=1 \ 44 | trainer.save_freq=-1 \ 45 | trainer.total_epochs=2 $@ -------------------------------------------------------------------------------- /tests/e2e/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -xeuo pipefail 3 | 4 | # Get the configuration name and engine name from arguments 5 | CONFIG_NAME="$1" 6 | ENGINE="${2:-vllm}" 7 | 8 | # Download model if needed 9 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir "$HOME/models/Qwen/Qwen2.5-0.5B" 10 | 11 | # Run the training with the specified configuration 12 | python3 -m verl.trainer.main_ppo \ 13 | --config-name "$CONFIG_NAME" "$@" -------------------------------------------------------------------------------- /tests/e2e/sft/run_sft.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xeuo pipefail 3 | 4 | ENTRYPOINT=${ENTRYPOINT:-"-m verl.trainer.fsdp_sft_trainer"} 5 | 6 | NUM_GPUS=${NUM_GPUS:-8} 7 | 8 | MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct} 9 | MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}} 10 | huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}" 11 | 12 | TRAIN_FILES=${TRAIN_FILES:-$HOME/data/gsm8k/train.parquet} 13 | VAL_FILES=${VAL_FILES:-$HOME/data/gsm8k/test.parquet} 14 | 15 | SP_SIZE=${SP_SIZE:-1} 16 | LIGER=${LIGER:-False} 17 | MULTITURN=${MULTITURN:-False} 18 | LORA_RANK=${LORA_RANK:-0} 19 | RM_PAD=${RM_PAD:-True} 20 | 21 | micro_bsz=2 22 | NUM_GPUS=8 23 | 24 | project_name="verl-test" 25 | exp_name="$(basename "${MODEL_ID,,}")-sft-minimal" 26 | ckpts_home=${ckpts_home:-$HOME/${project_name}/${exp_name}} 27 | 28 | mkdir -p "${ckpts_home}" 29 | 30 | torchrun --standalone --nnodes=1 --nproc_per_node=${NUM_GPUS} ${ENTRYPOINT} \ 31 | data.train_files="${TRAIN_FILES}" \ 32 | data.val_files="${VAL_FILES}" \ 33 | data.prompt_key=extra_info \ 34 | data.response_key=extra_info \ 35 | data.prompt_dict_keys=['question'] \ 36 | data.response_dict_keys=['answer'] \ 37 | data.multiturn.enable="${MULTITURN}" \ 38 | data.multiturn.messages_key=messages \ 39 | optim.lr=1e-4 \ 40 | data.micro_batch_size_per_gpu=${micro_bsz} \ 41 | model.partial_pretrain="${MODEL_PATH}" \ 42 | model.lora_rank="${LORA_RANK}" \ 43 | model.lora_alpha=16 \ 44 | model.target_modules=all-linear \ 45 | model.use_liger="${LIGER}" \ 46 | ulysses_sequence_parallel_size="${SP_SIZE}" \ 47 | use_remove_padding="${RM_PAD}" \ 48 | trainer.default_local_dir="${ckpts_home}" \ 49 | trainer.project_name="${project_name}" \ 50 | trainer.experiment_name="${exp_name}" \ 51 | trainer.total_training_steps=1 \ 52 | trainer.logger=['console'] \ 53 | trainer.default_hdfs_dir=null $@ 54 | 55 | rm -rf "${ckpts_home:?}/*" -------------------------------------------------------------------------------- /tests/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- 1 | # Tested with 1 & 4 GPUs 2 | set -x 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: run_gen_qwen05.sh [other_configs...]" 6 | exit 1 7 | fi 8 | 9 | nproc_per_node=$1 10 | save_path=$2 11 | infer_tp=${3:-2} # Default tensor parallel size to 2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | python3 -m verl.trainer.main_generation \ 17 | trainer.nnodes=1 \ 18 | trainer.n_gpus_per_node=$nproc_per_node \ 19 | data.path=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=prompt \ 21 | data.n_samples=1 \ 22 | data.output_path=$save_path \ 23 | model.path=Qwen/Qwen2.5-0.5B-Instruct \ 24 | +model.trust_remote_code=True \ 25 | rollout.temperature=1.0 \ 26 | rollout.top_k=50 \ 27 | rollout.top_p=0.7 \ 28 | rollout.prompt_length=2048 \ 29 | rollout.response_length=1024 \ 30 | rollout.tensor_model_parallel_size=$infer_tp \ 31 | rollout.gpu_memory_utilization=0.8 32 | -------------------------------------------------------------------------------- /tests/gpu_utility/test_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_flash_attn_cross_entropy(): 17 | import torch 18 | from flash_attn.ops.triton.cross_entropy import cross_entropy_loss 19 | from torch import nn 20 | 21 | from verl.utils.debug import log_gpu_memory_usage 22 | from verl.utils.torch_functional import logprobs_from_logits_naive 23 | 24 | log_gpu_memory_usage("At start") 25 | 26 | hidden_states = torch.randn(size=(2048, 5120), device="cuda", requires_grad=True, dtype=torch.bfloat16) 27 | 28 | linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device="cuda", dtype=torch.bfloat16) 29 | 30 | logits = linear(hidden_states) 31 | 32 | # logits = logits.float() 33 | labels = torch.randint(low=0, high=155136, size=(2048,), device="cuda") 34 | 35 | log_gpu_memory_usage("before computation") 36 | # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True) 37 | output = -cross_entropy_loss(logits, labels)[0] 38 | # output = logprobs_from_logits(logits, labels) 39 | log_gpu_memory_usage("After forward") 40 | output.sum().backward() 41 | log_gpu_memory_usage("After backward") 42 | 43 | groundtruth = logprobs_from_logits_naive(logits.float(), labels) 44 | 45 | torch.testing.assert_close(output, groundtruth) 46 | -------------------------------------------------------------------------------- /tests/kill_github_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 1 ]; then 4 | echo "Usage: $0 YOUR_GITHUB_TOKEN" 5 | echo "Please provide exactly one input argument for your github token." 6 | exit 1 7 | fi 8 | 9 | # Set your GitHub repository details 10 | OWNER="volcengine" 11 | REPO="verl" 12 | TOKEN=$1 13 | 14 | # API URL for workflow runs 15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued" 16 | 17 | # Check required commands 18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; } 19 | 20 | # Get queued workflow runs 21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL") 22 | 23 | # Run this for debugging 24 | # echo $response 25 | 26 | # Extract run IDs 27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id') 28 | 29 | if [ -z "$queued_run_ids" ]; then 30 | echo "No queued workflow runs found." 31 | exit 0 32 | fi 33 | 34 | # Cancel each queued run 35 | for run_id in $queued_run_ids; do 36 | echo "Cancelling run $run_id" 37 | cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel" 38 | curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url" 39 | done 40 | 41 | echo "Cancelled all queued workflow runs." 42 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /tests/ray/test_check_worker_alive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | import os 17 | import subprocess 18 | 19 | 20 | def test(): 21 | wait_time = 10 22 | 23 | my_env = os.environ.copy() 24 | my_env["WAIT_TIME"] = str(wait_time) 25 | 26 | p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE) 27 | 28 | count = 0 29 | while b"foo started" not in p.stdout.read(): 30 | time.sleep(1) 31 | count += 1 32 | if count > 40: 33 | raise RuntimeError("timeout for start foo in check_worker_alive/main.py") 34 | 35 | print( 36 | time.time(), 37 | f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time") 38 | time.sleep(wait_time * 1.5) 39 | print(time.time(), f"start checking") 40 | assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort" 41 | assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code" 42 | print(f"test passed") 43 | 44 | 45 | if __name__ == "__main__": 46 | test() 47 | -------------------------------------------------------------------------------- /tests/ray/test_rvdz.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class TestWorker: 20 | 21 | def __init__(self, rank, world_size, group_name): 22 | self.rank = rank 23 | self.world_size = world_size 24 | self.group_name = group_name 25 | self.communicator = None 26 | 27 | def init(self): 28 | from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray 29 | self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name) 30 | 31 | def test(self): 32 | if self.communicator is None: 33 | return None 34 | return self.communicator.rank_id() 35 | 36 | 37 | def test_rvdz(): 38 | ray.init() 39 | 40 | group_name = "test_group" 41 | world_size = 2 42 | 43 | workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)] 44 | 45 | ray.get([worker.init.remote() for worker in workers]) 46 | 47 | ranks = ray.get([worker.test.remote() for worker in workers]) 48 | 49 | assert ranks == [0, 1], f"expecting [0, 1], got {ranks}" 50 | 51 | ray.shutdown() 52 | -------------------------------------------------------------------------------- /tests/ray_cpu/test_check_worker_alive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import subprocess 17 | import time 18 | 19 | 20 | def test(): 21 | wait_time = 10 22 | 23 | my_env = os.environ.copy() 24 | my_env["WAIT_TIME"] = str(wait_time) 25 | 26 | p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE) 27 | 28 | count = 0 29 | while b"foo started" not in p.stdout.read(): 30 | time.sleep(1) 31 | count += 1 32 | if count > 40: 33 | raise RuntimeError("timeout for start foo in check_worker_alive/main.py") 34 | 35 | print( 36 | time.time(), 37 | f"wait 1.5 wait time {wait_time * 1.5} to let signal returned to process but still not exceed process wait time", 38 | ) 39 | time.sleep(wait_time * 1.5) 40 | print(time.time(), "start checking") 41 | assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort" 42 | assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code" 43 | print("test passed") 44 | 45 | 46 | if __name__ == "__main__": 47 | test() 48 | -------------------------------------------------------------------------------- /tests/ray_cpu/test_ray_local_envs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | e2e test verl.single_controller.ray 16 | """ 17 | 18 | import os 19 | 20 | import ray 21 | 22 | from verl.single_controller.base.worker import Worker 23 | from verl.single_controller.ray.base import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup 24 | 25 | 26 | @ray.remote 27 | class TestActor(Worker): 28 | def __init__(self) -> None: 29 | super().__init__() 30 | 31 | def getenv(self, key): 32 | val = os.getenv(key, f"{key} not set") 33 | return val 34 | 35 | 36 | def test_basics(): 37 | ray.init(num_cpus=100) 38 | 39 | # create 4 workers, each hold a GPU 40 | resource_pool = RayResourcePool([4], use_gpu=False) 41 | class_with_args = RayClassWithInitArgs(cls=TestActor) 42 | 43 | worker_group = RayWorkerGroup(resource_pool=resource_pool, ray_cls_with_init=class_with_args, name_prefix="worker_group_basic") 44 | 45 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE") 46 | assert output == ["4", "4", "4", "4"] 47 | 48 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK") 49 | assert set(output) == set(["0", "1", "2", "3"]) 50 | 51 | ray.shutdown() 52 | 53 | 54 | if __name__ == "__main__": 55 | test_basics() 56 | -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /tests/ray_gpu/test_rvdz.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class TestWorker: 20 | def __init__(self, rank, world_size, group_name): 21 | self.rank = rank 22 | self.world_size = world_size 23 | self.group_name = group_name 24 | self.communicator = None 25 | 26 | def init(self): 27 | from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray 28 | 29 | self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name) 30 | 31 | def test(self): 32 | if self.communicator is None: 33 | return None 34 | return self.communicator.rank_id() 35 | 36 | 37 | def test_rvdz(): 38 | ray.init() 39 | 40 | group_name = "test_group" 41 | world_size = 2 42 | 43 | workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)] 44 | 45 | ray.get([worker.init.remote() for worker in workers]) 46 | 47 | ranks = ray.get([worker.test.remote() for worker in workers]) 48 | 49 | assert ranks == [0, 1], f"expecting [0, 1], got {ranks}" 50 | 51 | ray.shutdown() 52 | -------------------------------------------------------------------------------- /tests/rl_factory/envs/test_tool_use.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer 2 | from envs.base import Env 3 | 4 | 5 | def test(): 6 | config = type('Config', (), { 7 | 'config_path': 'envs/configs/calculator.json', 8 | 'step_token': '\n' 9 | }) 10 | env = Env(config) 11 | tokenizer = AutoTokenizer.from_pretrained('/your/path/to/Qwen/Qwen2.5-7B-Instruct') 12 | 13 | response_action = """ 14 | Hello! 15 | 16 | 17 | 18 | {"expression": "1+1"} 19 | 20 | 21 | 22 | calculator 23 | 24 | {"expressions": "1+2"} 25 | 26 | 27 | 28 | """ 29 | response_answer = f""" 30 | Hello! 31 | 32 | 2 33 | 34 | """ 35 | env.step([response_action, response_answer], tokenizer) 36 | 37 | if __name__ == '__main__': 38 | test() 39 | -------------------------------------------------------------------------------- /tests/rl_factory/rewarder/test_parallel.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict 2 | from multiprocessing import Pool 3 | from generator import get_generator 4 | from verl.utils.vllm_request import vllm_generate 5 | 6 | 7 | def test_parallel_rewarder(): 8 | questions = [ 9 | "Python中如何实现多线程编程?", 10 | "解释一下Python的GIL(全局解释器锁)及其影响", 11 | "Python中的装饰器是什么?请举例说明", 12 | "如何用Python处理JSON数据?", 13 | "Python中列表(list)和元组(tuple)有什么区别?", 14 | "解释Python的生成器(generator)和它们的优势", 15 | "Python中如何处理异常?try-except块如何使用?", 16 | "Python的虚拟环境(virtualenv)有什么作用?如何创建和使用?" 17 | ] 18 | 19 | generator = get_generator('api')( 20 | config=EasyDict({ 21 | 'api_method': 'local', 22 | 'port': 9000 23 | })) 24 | 25 | print('Start') 26 | with Pool(processes=8) as pool: 27 | results = [] 28 | for question in questions: 29 | result = pool.apply_async( 30 | vllm_generate, args=('http://0.0.0.0:8080', question, '/your/path/to/Qwen/QwQ-32B') 31 | ) 32 | results.append(result) 33 | 34 | datasets_processed = [result.get() for result in results] 35 | 36 | 37 | if __name__ == '__main__': 38 | test_parallel_rewarder() 39 | -------------------------------------------------------------------------------- /tests/rl_factory/test_qwen3_manager.py: -------------------------------------------------------------------------------- 1 | from envs.tool_manager.qwen3_manager import QwenManager 2 | 3 | 4 | def test_manager(): 5 | env_config = { 6 | 'name': 'base', 7 | 'tool_manager': 'qwen3', 8 | 'mcp_mode': 'sse', 9 | 'config_path': 'envs/configs/sse_mcp_tools.pydata', 10 | 'enable_thinking': True, 11 | 'max_prompt_length': 2048, 12 | } 13 | manager = QwenManager(env_config) 14 | print('Tools:') 15 | for tool_name, tool in manager.all_tools.items(): 16 | print(' - tool name: {}'.format(tool_name)) 17 | 18 | for func in manager.tool_map.values(): 19 | print(func.function) 20 | 21 | 22 | if __name__ == '__main__': 23 | test_manager() 24 | -------------------------------------------------------------------------------- /tests/sanity/test_import.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_import(): 17 | import verl 18 | 19 | print(verl.__version__) 20 | 21 | 22 | def test_single_controller_import(): 23 | import verl.single_controller 24 | 25 | print(verl.single_controller.__version__) 26 | -------------------------------------------------------------------------------- /tests/sft/run_sft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | -m verl.trainer.fsdp_sft_trainer \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size_per_gpu=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | trainer.default_local_dir=$HOME/ckpts/ \ 16 | trainer.project_name=qwen2.5-sft \ 17 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 18 | trainer.total_training_steps=1 \ 19 | trainer.logger=['console'] \ 20 | trainer.default_hdfs_dir=null $@ 21 | 22 | rm -rf $HOME/ckpts/ -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_sft_qwen05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_training_steps=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_sft_qwen05_sp2_liger.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.total_training_steps=1 \ 31 | trainer.default_hdfs_dir=null $@ \ 32 | ulysses_sequence_parallel_size=2 \ 33 | use_remove_padding=true -------------------------------------------------------------------------------- /tests/sft/run_sft_sp_loss_match.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | tests/sft/test_sp_loss_match.py \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | ulysses_sequence_parallel_size=2 \ 16 | use_remove_padding=True \ 17 | trainer.default_local_dir=$HOME/ckpts/ \ 18 | trainer.project_name=qwen2.5-sft \ 19 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 20 | trainer.total_training_steps=1 \ 21 | trainer.logger=['console'] \ 22 | trainer.default_hdfs_dir=null $@ 23 | 24 | rm -rf $HOME/ckpts/ 25 | -------------------------------------------------------------------------------- /tests/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Tests for the trainer module. 16 | """ -------------------------------------------------------------------------------- /tests/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Tests for the PPO trainer module. 16 | """ 17 | -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_module.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | # Test module for import_utils.load_extern_type testing 17 | class TestClass: 18 | """A test class to be imported by load_extern_type""" 19 | 20 | def __init__(self, value=None): 21 | self.value = value or "default" 22 | 23 | def get_value(self): 24 | return self.value 25 | 26 | 27 | TEST_CONSTANT = "test_constant_value" 28 | 29 | 30 | def test_function(): 31 | return "test_function_result" 32 | -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from verl.utils import hf_tokenizer 17 | from verl.utils.dataset.rm_dataset import RMDataset 18 | 19 | 20 | def get_rm_data(): 21 | # prepare test dataset 22 | local_folder = os.path.expanduser("~/verl-data/full_hh_rlhf/rm/") 23 | local_path = os.path.join(local_folder, "test.parquet") 24 | os.makedirs(local_folder, exist_ok=True) 25 | return local_path 26 | 27 | 28 | def test_rm_dataset(): 29 | tokenizer = hf_tokenizer("facebook/opt-1.3b") 30 | local_path = get_rm_data() 31 | dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512) 32 | data = dataset[0]["input_ids"] 33 | output = tokenizer.batch_decode(data) 34 | assert len(output) > 1 35 | assert isinstance(output[0], str) 36 | -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from transformers import AutoTokenizer 17 | from verl.utils import hf_tokenizer 18 | from verl.utils.dataset.rm_dataset import RMDataset 19 | 20 | 21 | def get_rm_data(): 22 | # prepare test dataset 23 | url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet" 24 | local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/') 25 | local_path = os.path.join(local_folder, 'test.parquet') 26 | os.makedirs(local_folder, exist_ok=True) 27 | return local_path 28 | 29 | 30 | def test_rm_dataset(): 31 | tokenizer = hf_tokenizer("facebook/opt-1.3b") 32 | local_path = get_rm_data() 33 | dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512) 34 | data = dataset[0]['input_ids'] 35 | output = tokenizer.batch_decode(data) 36 | assert len(output) > 1 37 | assert type(output[0]) == str 38 | -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | 18 | from .protocol import DataProto 19 | from .utils.logging_utils import set_basic_config 20 | 21 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 22 | 23 | with open(os.path.join(version_folder, "version/version")) as f: 24 | __version__ = f.read().strip() 25 | 26 | 27 | set_basic_config(level=logging.WARNING) 28 | 29 | 30 | __all__ = ["DataProto", "__version__"] 31 | 32 | if os.getenv("VERL_USE_MODELSCOPE", "False").lower() == "true": 33 | import importlib 34 | 35 | if importlib.util.find_spec("modelscope") is None: 36 | raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope -U`") 37 | # Patch hub to download models from modelscope to speed up. 38 | from modelscope.utils.hf_util import patch_hub 39 | 40 | patch_hub() 41 | -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 3 | ## Adding a New Huggingface Model 4 | ### Step 1: Copy the model file from HF to verl 5 | - Add a new file under verl/models/hf 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf 7 | 8 | ### Step 2: Modify the model file to use packed inputs 9 | - Remove all the code related to inference (kv cache) 10 | - Modify the inputs to include only 11 | - input_ids (total_nnz,) 12 | - cu_seqlens (total_nnz + 1,) 13 | - max_seqlen_in_batch: int 14 | - Note that this requires using flash attention with causal mask. 15 | 16 | ### Step 2.5: Add tests 17 | - Add a test to compare this version and the huggingface version 18 | - Following the infrastructure and add tests to tests/models/hf 19 | 20 | ### Step 3: Add a function to apply tensor parallelism 21 | - Please follow 22 | - https://pytorch.org/docs/stable/distributed.tensor.parallel.html 23 | - https://pytorch.org/tutorials/intermediate/TP_tutorial.html 24 | - General comments 25 | - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward. 26 | 27 | ### Step 4: Add a function to apply data parallelism 28 | - Please use FSDP2 APIs 29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413 30 | 31 | ### Step 5: Add a function to apply pipeline parallelism 32 | - Comes in Pytorch 2.4 33 | - Currently only in alpha in nightly version 34 | - Check torchtitan for more details 35 | 36 | -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | ParallelLlamaForCausalLM, 17 | # rmpad with megatron 18 | ParallelLlamaForCausalLMRmPad, 19 | # rmpad with megatron and pipeline parallelism 20 | ParallelLlamaForCausalLMRmPadPP, 21 | ParallelLlamaForValueRmPad, 22 | ParallelLlamaForValueRmPadPP, 23 | # original model with megatron 24 | ParallelLlamaModel, 25 | ) 26 | 27 | __all__ = [ 28 | "ParallelLlamaForCausalLM", 29 | "ParallelLlamaForCausalLMRmPad", 30 | "ParallelLlamaForCausalLMRmPadPP", 31 | "ParallelLlamaForValueRmPad", 32 | "ParallelLlamaForValueRmPadPP", 33 | "ParallelLlamaModel", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_linear import ( 18 | LinearForLastLayer, 19 | MergedColumnParallelLinear, 20 | QKVParallelLinear, 21 | ) 22 | from .parallel_mlp import ParallelLlamaMLP 23 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 24 | 25 | __all__ = ["LinearForLastLayer", "MergedColumnParallelLinear", "QKVParallelLinear", "ParallelLlamaAttention", "ParallelLlamaDecoderLayer", "ParallelLlamaDecoderLayerRmPad", "ParallelLlamaMLP", "ParallelLlamaRMSNorm"] 26 | -------------------------------------------------------------------------------- /verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .registry import get_mcore_forward_fn, get_mcore_weight_converter, hf_to_mcore_config, init_mcore_model 17 | 18 | __all__ = ["hf_to_mcore_config", "init_mcore_model", "get_mcore_forward_fn", "get_mcore_weight_converter"] 19 | -------------------------------------------------------------------------------- /verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_qwen2_megatron import ( 16 | ParallelQwen2ForCausalLM, 17 | # rmpad with megatron 18 | ParallelQwen2ForCausalLMRmPad, 19 | # rmpad with megatron and pipeline parallelism 20 | ParallelQwen2ForCausalLMRmPadPP, 21 | ParallelQwen2ForValueRmPad, 22 | ParallelQwen2ForValueRmPadPP, 23 | # original model with megatron 24 | ParallelQwen2Model, 25 | ) 26 | 27 | __all__ = [ 28 | "ParallelQwen2ForCausalLM", 29 | "ParallelQwen2ForCausalLMRmPad", 30 | "ParallelQwen2ForCausalLMRmPadPP", 31 | "ParallelQwen2ForValueRmPad", 32 | "ParallelQwen2ForValueRmPadPP", 33 | "ParallelQwen2Model", 34 | ] 35 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelQwen2Attention 16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad 17 | from .parallel_mlp import ParallelQwen2MLP 18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm 19 | 20 | __all__ = ["ParallelQwen2Attention", "ParallelQwen2DecoderLayer", "ParallelQwen2DecoderLayerRmPad", "ParallelQwen2MLP", "ParallelQwen2RMSNorm"] 21 | -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from . import base 17 | from .base import * 18 | 19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 20 | 21 | # Note(haibin.lin): single_controller.__version__ is deprecated 22 | with open(os.path.join(os.path.join(version_folder, os.pardir), "version/version")) as f: 23 | __version__ = f.read().strip() 24 | 25 | 26 | __all__ = base.__all__ 27 | -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup 17 | 18 | __all__ = ["Worker", "WorkerGroup", "ClassWithInitArgs", "ResourcePool"] 19 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | import ray 18 | 19 | 20 | @ray.remote 21 | class WorkerGroupRegisterCenter: 22 | def __init__(self, rank_zero_info): 23 | self.rank_zero_info = rank_zero_info 24 | # rank -> node_id 25 | self.workers_info: Dict[int, str] = {} 26 | 27 | def get_rank_zero_info(self): 28 | return self.rank_zero_info 29 | 30 | def set_worker_info(self, rank, node_id) -> None: 31 | self.workers_info[rank] = node_id 32 | 33 | def get_worker_info(self) -> Dict[int, str]: 34 | return self.workers_info 35 | 36 | 37 | def create_worker_group_register_center(name, info): 38 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 39 | -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup, create_colocated_worker_cls, create_colocated_worker_cls_fused 16 | 17 | __all__ = ["RayClassWithInitArgs", "RayResourcePool", "RayWorkerGroup", "create_colocated_worker_cls", "create_colocated_worker_cls_fused"] 18 | -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. 27 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models 15 | 16 | from typing import Dict 17 | 18 | import torch.nn as nn 19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 20 | 21 | 22 | def update_hf_weight_loader(): 23 | print("no hf weight loader need to be updated") 24 | return 25 | 26 | 27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 28 | assert isinstance(actor_weights, Dict) 29 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 30 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights: 31 | del actor_weights["lm_head.weight"] 32 | vllm_model.load_weights(actor_weights.items()) 33 | for _, module in vllm_model.named_modules(): 34 | quant_method = getattr(module, "quant_method", None) 35 | if quant_method is not None: 36 | quant_method.process_weights_after_loading(module) 37 | # FIXME: Remove this after Mixtral is updated 38 | # to use quant_method. 39 | if hasattr(module, "process_weights_after_loading"): 40 | module.process_weights_after_loading() 41 | vllm_model = vllm_model.cuda() 42 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader 15 | 16 | from typing import Dict 17 | 18 | import torch.nn as nn 19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 20 | 21 | 22 | def update_hf_weight_loader(): 23 | print("no hf weight loader need to be updated") 24 | return 25 | 26 | 27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 28 | assert isinstance(actor_weights, Dict) 29 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 30 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights: 31 | del actor_weights["lm_head.weight"] 32 | vllm_model.load_weights(actor_weights.items()) 33 | for _, module in vllm_model.named_modules(): 34 | quant_method = getattr(module, "quant_method", None) 35 | if quant_method is not None: 36 | quant_method.process_weights_after_loading(module) 37 | # FIXME: Remove this after Mixtral is updated 38 | # to use quant_method. 39 | if hasattr(module, "process_weights_after_loading"): 40 | module.process_weights_after_loading() 41 | vllm_model = vllm_model.cuda() 42 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import Optional 17 | 18 | from transformers import PreTrainedTokenizer 19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup 20 | from vllm.utils import LRUCache 21 | 22 | 23 | class TokenizerGroup(TokenizerGroup): 24 | """A group of tokenizers that can be used for LoRA adapters.""" 25 | 26 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, max_input_length: Optional[int]): 27 | self.enable_lora = enable_lora 28 | self.max_input_length = max_input_length 29 | self.tokenizer = tokenizer 30 | self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None 31 | 32 | # FIXME(sgm): for simplicity, we assign the special token here 33 | @property 34 | def pad_token_id(self): 35 | return self.tokenizer.pad_token_id 36 | 37 | @property 38 | def eos_token_id(self): 39 | return self.tokenizer.eos_token_id 40 | -------------------------------------------------------------------------------- /verl/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Copyright 2025 ModelBest Inc. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model 7 | 8 | custom_reward_function: 9 | path: null 10 | name: compute_score 11 | 12 | ray_init: 13 | num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | 5 | data: 6 | path: ~/data/rlhf/math/test.parquet 7 | prompt_key: prompt 8 | n_samples: 5 9 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 10 | batch_size: 128 11 | 12 | model: 13 | path: ~/models/Qwen2-7B-Instruct 14 | external_lib: null 15 | rollout: 16 | name: vllm 17 | mode: sync # sync: LLM, async: AsyncLLM 18 | temperature: 1.0 19 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 20 | top_p: 0.7 21 | prompt_length: 1536 22 | response_length: 512 23 | # for vllm rollout 24 | dtype: bfloat16 # should align with FSDP 25 | gpu_memory_utilization: 0.5 26 | ignore_eos: False 27 | enforce_eager: True 28 | free_cache_engine: True 29 | load_format: dummy_dtensor 30 | tensor_model_parallel_size: 1 31 | max_num_batched_tokens: 8192 32 | max_model_len: null 33 | max_num_seqs: 1024 34 | log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu 35 | log_prob_micro_batch_size_per_gpu: 8 36 | # for fire vllm rollout 37 | use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236 38 | # for hf rollout 39 | do_sample: True 40 | disable_log_stats: True 41 | enable_chunked_prefill: True 42 | n: 1 43 | actor: 44 | strategy: fsdp # This is for backward-compatibility 45 | ulysses_sequence_parallel_size: 1 # sp size 46 | fsdp_config: 47 | fsdp_size: -1 48 | 49 | ray_init: 50 | num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then. 51 | -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu 4 | micro_batch_size_per_gpu: 4 # this is also val batch size 5 | train_files: ~/data/gsm8k/train.parquet 6 | val_files: ~/data/gsm8k/test.parquet 7 | # Single-turn settings 8 | prompt_key: question 9 | response_key: answer 10 | prompt_dict_keys: ['question'] 11 | response_dict_keys: ['answer'] 12 | # Multi-turn settings 13 | multiturn: 14 | enable: false # Set to true to use multi-turn dataset 15 | messages_key: messages # Key for messages list in multi-turn mode 16 | max_length: 1024 17 | truncation: error 18 | balance_dp_token: False 19 | chat_template: null 20 | custom_cls: 21 | path: null 22 | name: null 23 | model: 24 | partial_pretrain: ~/models/gemma-1.1-7b-it 25 | fsdp_config: 26 | wrap_policy: 27 | min_num_params: 0 28 | cpu_offload: False 29 | offload_params: False 30 | external_lib: null 31 | enable_gradient_checkpointing: False 32 | trust_remote_code: False 33 | lora_rank: 0 # Set to positive value to enable LoRA (e.g., 32) 34 | lora_alpha: 16 # LoRA scaling factor 35 | target_modules: all-linear # Target modules for LoRA adaptation 36 | use_liger: False 37 | optim: 38 | lr: 1e-5 39 | betas: [0.9, 0.95] 40 | weight_decay: 0.01 41 | warmup_steps_ratio: 0.1 42 | clip_grad: 1.0 43 | lr_scheduler: cosine 44 | ulysses_sequence_parallel_size: 1 45 | use_remove_padding: False 46 | trainer: 47 | default_local_dir: /tmp/sft_model 48 | default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here 49 | resume_path: null 50 | project_name: gsm8k-sft 51 | experiment_name: test 52 | total_epochs: 4 53 | total_training_steps: null 54 | logger: ['console'] 55 | seed: 1 56 | 57 | -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs: 6 | # VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import tokenizer 16 | from .tokenizer import hf_processor, hf_tokenizer 17 | 18 | __all__ = tokenizer.__all__ + ["hf_processor", "hf_tokenizer"] 19 | -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | 19 | __all__ = ["RLHFDataset", "RMDataset", "SFTDataset"] 20 | -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import GPUMemoryLogger, log_gpu_memory_usage 16 | 17 | __all__ = ["GPUMemoryLogger", "log_gpu_memory_usage"] 18 | -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | 16 | import os 17 | 18 | 19 | def initialize_global_process_group(timeout_second=36000): 20 | from datetime import timedelta 21 | 22 | import torch.distributed 23 | 24 | torch.distributed.init_process_group("nccl", timeout=timedelta(seconds=timeout_second)) 25 | local_rank = int(os.environ["LOCAL_RANK"]) 26 | rank = int(os.environ["RANK"]) 27 | world_size = int(os.environ["WORLD_SIZE"]) 28 | 29 | if torch.distributed.is_initialized(): 30 | torch.cuda.set_device(local_rank) 31 | return local_rank, rank, world_size 32 | -------------------------------------------------------------------------------- /verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | 18 | import torch 19 | 20 | 21 | def set_basic_config(level): 22 | """ 23 | This function sets the global logging format and level. It will be called when import verl 24 | """ 25 | logging.basicConfig(format="%(levelname)s:%(asctime)s:%(message)s", level=level) 26 | 27 | 28 | def log_to_file(string): 29 | print(string) 30 | if os.path.isdir("logs"): 31 | with open(f"logs/log_{torch.distributed.get_rank()}", "a+") as f: 32 | f.write(string + "\n") 33 | -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | 18 | class MemoryBuffer: 19 | def __init__(self, numel, numel_padded, dtype): 20 | self.numel = numel 21 | self.numel_padded = numel_padded 22 | self.dtype = dtype 23 | self.data = torch.zeros(self.numel_padded, dtype=self.dtype, device=torch.cuda.current_device(), requires_grad=False) 24 | 25 | def zero(self): 26 | """Reset the buffer to zero.""" 27 | self.data.zero_() 28 | 29 | def get(self, shape, start_index): 30 | """Return a tensor with the input `shape` as a view into the 31 | 1-D data starting at `start_index`.""" 32 | end_index = start_index + shape.numel() 33 | assert end_index <= self.numel, "requested tensor is out of the buffer range." 34 | buffer_tensor = self.data[start_index:end_index] 35 | buffer_tensor = buffer_tensor.view(shape) 36 | return buffer_tensor 37 | -------------------------------------------------------------------------------- /verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from megatron.core.optimizer import OptimizerConfig 17 | from megatron.core.optimizer import get_megatron_optimizer as get_megatron_optimizer_native 18 | 19 | 20 | def get_megatron_optimizer( 21 | model, 22 | config: OptimizerConfig, 23 | no_weight_decay_cond=None, 24 | scale_lr_cond=None, 25 | lr_mult=1.0, 26 | ): 27 | # Base optimizer. 28 | return get_megatron_optimizer_native( 29 | config=config, 30 | model_chunks=model, 31 | no_weight_decay_cond=no_weight_decay_cond, 32 | scale_lr_cond=scale_lr_cond, 33 | lr_mult=lr_mult, 34 | ) 35 | 36 | 37 | # TODO: add get_optimizer_param_scheduler(optimizer) to implement lr scheuler. 38 | -------------------------------------------------------------------------------- /verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .utils import reduce_metrics 16 | 17 | __all__ = ["reduce_metrics"] 18 | -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import concurrent.futures 19 | 20 | import ray 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | def put_data(index, data): 25 | return index, ray.put(data) 26 | 27 | if max_workers is None: 28 | max_workers = min(len(data_list), 16) 29 | 30 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 31 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 32 | res_lst = [] 33 | for future in concurrent.futures.as_completed(data_list_f): 34 | res_lst.append(future.result()) 35 | 36 | # reorder based on index 37 | output = [None for _ in range(len(data_list))] 38 | for res in res_lst: 39 | index, data_ref = res 40 | output[index] = data_ref 41 | 42 | return output 43 | -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | from mathruler.grader import extract_boxed_content, grade_answer 18 | 19 | 20 | def format_reward(predict_str: str) -> float: 21 | pattern = re.compile(r".*.*\\boxed\{.*\}.*", re.DOTALL) 22 | match_result = re.fullmatch(pattern, predict_str) 23 | return 1.0 if match_result else 0.0 24 | 25 | 26 | def acc_reward(predict_str: str, ground_truth: str) -> float: 27 | answer = extract_boxed_content(predict_str) 28 | return 1.0 if grade_answer(answer, ground_truth) else 0.0 29 | 30 | 31 | def compute_score(predict_str: str, ground_truth: str) -> float: 32 | return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str) 33 | -------------------------------------------------------------------------------- /verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Individual Contributor: Mert Unsal 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .math import compute_score 16 | 17 | 18 | def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos): 19 | """ 20 | This is a demonstration of how the batched reward function should look like. 21 | Typically, you want to use batched reward to speed up the process with parallelization 22 | """ 23 | return [compute_score(solution_str, ground_truth) for solution_str, ground_truth in zip(solution_strs, ground_truths)] 24 | -------------------------------------------------------------------------------- /verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | try: 16 | from math_verify.errors import TimeoutException 17 | from math_verify.metric import math_metric 18 | from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig 19 | except ImportError: 20 | print("To use Math-Verify, please install it first by running `pip install math-verify`.") 21 | 22 | 23 | def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> bool: 24 | verify_func = math_metric( 25 | gold_extraction_target=(LatexExtractionConfig(),), 26 | pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), 27 | ) 28 | ret_score = 0.0 29 | 30 | # Wrap the ground truth in \boxed{} format for verification 31 | ground_truth_boxed = "\\boxed{" + ground_truth + "}" 32 | try: 33 | ret_score, _ = verify_func([ground_truth_boxed], [model_output]) 34 | except Exception: 35 | pass 36 | except TimeoutException: 37 | ret_score = timeout_score 38 | 39 | return ret_score 40 | -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.3.1.dev 2 | -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | import torch 21 | 22 | from verl import DataProto 23 | 24 | __all__ = ["BasePPOCritic"] 25 | 26 | 27 | class BasePPOCritic(ABC): 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .batch import BatchRewardManager 16 | from .dapo import DAPORewardManager 17 | from .naive import NaiveRewardManager 18 | from .prime import PrimeRewardManager 19 | from .parallel import AsyncRewardManager 20 | 21 | __all__ = ["BatchRewardManager", "DAPORewardManager", "NaiveRewardManager", "PrimeRewardManager", "AsyncRewardManager"] 22 | -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | 17 | __all__ = ["BasePPORewardModel"] 18 | -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for reward model 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | from verl import DataProto 21 | 22 | 23 | class BasePPORewardModel(ABC): 24 | def __init__(self, config): 25 | self.config = config 26 | 27 | @abstractmethod 28 | def compute_reward(self, data: DataProto) -> DataProto: 29 | """Computing reward given input_ids. The transformers should output a tensor with shape 30 | [batch_size, sequence_length], and the value at [EOS] mask should be gathered. 31 | 32 | Args: 33 | data: must contain keys "input_ids", "attention_mask" and "position_ids". 34 | - input_ids: [batch_size, sequence_length] 35 | - attention_mask: [batch_size, sequence_length] 36 | - position_ids: [batch_size, sequence_length] 37 | 38 | Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward. 39 | Other position should have zero reward. Note that this may change in the future if we use 40 | dense reward. So, we leave the interface for general case. 41 | - reward: [batch_size, sequence_length]. 42 | 43 | """ 44 | pass 45 | -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | 17 | __all__ = ["MegatronRewardModel"] 18 | -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .hf_rollout import HFRollout 17 | from .naive import NaiveRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | 17 | from verl import DataProto 18 | 19 | __all__ = ["BaseRollout"] 20 | 21 | 22 | class BaseRollout(ABC): 23 | def __init__(self): 24 | """ 25 | 26 | Args: 27 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 28 | should handle when the training stops. 29 | """ 30 | super().__init__() 31 | 32 | @abstractmethod 33 | def generate_sequences(self, prompts: DataProto) -> DataProto: 34 | """Generate sequences""" 35 | pass 36 | -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | 17 | __all__ = ["NaiveRollout"] 18 | -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | from .async_sglang_rollout import AsyncSGLangRollout 15 | from .sglang_rollout import SGLangRollout 16 | 17 | __all__ = ["AsyncSGLangRollout", "SGLangRollout"] 18 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | def __enter__(self): 23 | pass 24 | 25 | def __exit__(self, exc_type, exc_value, traceback): 26 | pass 27 | 28 | def preprocess_data(self, data: DataProto) -> DataProto: 29 | return data 30 | 31 | def postprocess_data(self, data: DataProto) -> DataProto: 32 | return data 33 | -------------------------------------------------------------------------------- /webui/README.md: -------------------------------------------------------------------------------- 1 | # RL Factory WebUI 2 | 3 | This is the Web User Interface for RL Factory, built with Gradio. The interface provides an intuitive way to manage all aspects of reinforcement learning experiments. 4 | 5 | ## Feature Modules 6 | 7 | The WebUI includes the following five main modules: 8 | 9 | 1. **Data Processing** – For managing and processing experiment data 10 | 2. **Tool Definition** – For defining and managing experiment tools 11 | 3. **Environment Definition** – For configuring and managing experiment environments 12 | 4. **Training & Deployment** – For training models and deploying experiments 13 | 5. **Project Management** – For managing experiment projects and resources 14 | 15 | ## Installation 16 | 17 | 1. Make sure all dependencies are installed: 18 | ```bash 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | 2. Run the application: 23 | ```bash 24 | python app.py 25 | ``` 26 | 27 | The application will start at http://localhost:7860. 28 | 29 | ## Development Notes 30 | 31 | - Each feature module is implemented as a separate tab in `app.py` 32 | - The interface is built using Gradio's Blocks API 33 | - All components support real-time updates and interaction 34 | 35 | ## Notes 36 | 37 | - Ensure all necessary dependencies are installed before running the application 38 | - The default port is 7860, which can be modified in `app.py` 39 | - Debug mode is enabled during development; please disable it for production deployment -------------------------------------------------------------------------------- /webui/app.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from tabs import ( 3 | create_data_processing_tab, 4 | create_tool_definition_tab, 5 | create_reward_definition_tab, 6 | create_training_deployment_tab, 7 | create_project_management_tab 8 | ) 9 | 10 | def create_app(): 11 | """创建主应用 12 | 13 | 整合所有标签页模块,创建完整的 WebUI 应用。 14 | 每个标签页都是独立的模块,便于维护和扩展。 15 | """ 16 | with gr.Blocks(title="RL Factory WebUI") as app: 17 | gr.Markdown("# RL Factory WebUI") 18 | gr.Markdown(""" 19 | 欢迎使用 RL Factory WebUI,这是一个用于强化学习实验管理的工具。 20 | """) 21 | 22 | with gr.Tabs() as tabs: 23 | with gr.TabItem("数据处理"): 24 | create_data_processing_tab() 25 | with gr.TabItem("工具定义"): 26 | create_tool_definition_tab() 27 | with gr.TabItem("奖赏定义"): 28 | create_reward_definition_tab() 29 | with gr.TabItem("训练部署"): 30 | create_training_deployment_tab() 31 | with gr.TabItem("项目管理"): 32 | create_project_management_tab() 33 | 34 | return app 35 | 36 | if __name__ == "__main__": 37 | app = create_app() 38 | app.launch( 39 | server_name="0.0.0.0", 40 | server_port=7860, 41 | share=False, 42 | debug=True 43 | ) -------------------------------------------------------------------------------- /webui/components/rewards/graders/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, Type, Any 3 | 4 | 5 | class BaseGrader(ABC): 6 | """奖赏评分器基类""" 7 | 8 | @abstractmethod 9 | def grade(self, prediction: Any, reference: Any) -> float: 10 | """评分方法 11 | 12 | Args: 13 | prediction: 预测值 14 | reference: 参考值 15 | 16 | Returns: 17 | float: 评分结果(0-1之间) 18 | """ 19 | pass 20 | 21 | @property 22 | @abstractmethod 23 | def name(self) -> str: 24 | """评分器名称""" 25 | pass 26 | 27 | @property 28 | @abstractmethod 29 | def description(self) -> str: 30 | """评分器描述""" 31 | pass 32 | 33 | 34 | class GraderRegistry: 35 | """评分器注册器""" 36 | 37 | _registry: Dict[str, Type[BaseGrader]] = {} 38 | 39 | @classmethod 40 | def register(cls, grader_class: Type[BaseGrader]) -> Type[BaseGrader]: 41 | """注册评分器 42 | 43 | Args: 44 | grader_class: 评分器类 45 | 46 | Returns: 47 | 注册的评分器类 48 | """ 49 | cls._registry[grader_class.name] = grader_class 50 | return grader_class 51 | 52 | @classmethod 53 | def get(cls, name: str) -> Type[BaseGrader]: 54 | """获取评分器 55 | 56 | Args: 57 | name: 评分器名称 58 | 59 | Returns: 60 | 评分器类 61 | """ 62 | if name not in cls._registry: 63 | raise KeyError(f"未找到名为 {name} 的评分器") 64 | return cls._registry[name] 65 | 66 | @classmethod 67 | def list_graders(cls) -> Dict[str, str]: 68 | """列出所有已注册的评分器 69 | 70 | Returns: 71 | Dict[str, str]: 评分器名称和描述的字典 72 | """ 73 | return {name: grader.description for name, grader in cls._registry.items()} 74 | -------------------------------------------------------------------------------- /webui/components/rewards/graders/graders.py: -------------------------------------------------------------------------------- 1 | """评分器导入文件,确保所有评分器都被正确注册""" 2 | 3 | from .qwen_math import QwenMathGrader 4 | 5 | __all__ = ['QwenMathGrader'] -------------------------------------------------------------------------------- /webui/requirements.txt: -------------------------------------------------------------------------------- 1 | gradio>=4.19.2 2 | fastapi>=0.109.0 3 | uvicorn>=0.27.0 4 | python-multipart>=0.0.9 5 | pydantic>=2.6.1 -------------------------------------------------------------------------------- /webui/run_webui.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 获取脚本所在目录的绝对路径 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | cd "$SCRIPT_DIR" 6 | 7 | # 安装依赖 8 | # echo "正在安装依赖..." 9 | # pip install -r requirements.txt 10 | 11 | # 启动 WebUI 12 | echo "正在启动 RL Factory WebUI..." 13 | echo "服务将在 http://localhost:7860 启动" 14 | echo "按 Ctrl+C 停止服务" 15 | 16 | python3 app.py -------------------------------------------------------------------------------- /webui/tabs/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_processing import create_data_processing_tab 2 | from .tool_definition import create_tool_definition_tab 3 | from .reward_definition import create_reward_definition_tab 4 | from .training_deployment import create_training_deployment_tab 5 | from .project_management import create_project_management_tab 6 | 7 | __all__ = [ 8 | 'create_data_processing_tab', 9 | 'create_tool_definition_tab', 10 | 'create_reward_definition_tab', 11 | 'create_training_deployment_tab', 12 | 'create_project_management_tab' 13 | ] -------------------------------------------------------------------------------- /webui/tabs/data_processing.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | def create_data_processing_tab(): 4 | """数据处理标签页 5 | 6 | 该标签页用于管理和处理实验数据,包括: 7 | - 数据导入导出 8 | - 数据预处理 9 | - 数据可视化 10 | - 数据集管理 11 | """ 12 | with gr.Blocks() as tab: 13 | gr.Markdown("# 数据处理") 14 | gr.Markdown(""" 15 | ## 功能说明 16 | 在此标签页中,您可以: 17 | - 导入和导出实验数据 18 | - 进行数据预处理和转换 19 | - 查看数据可视化结果 20 | - 管理实验数据集 21 | """) 22 | # 这里后续会添加具体内容 23 | return tab -------------------------------------------------------------------------------- /webui/tabs/project_management.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | def create_project_management_tab(): 4 | """项目管理标签页 5 | 6 | 该标签页用于管理实验项目和资源,包括: 7 | - 项目管理 8 | - 资源监控 9 | - 实验记录 10 | - 结果分析 11 | """ 12 | with gr.Blocks() as tab: 13 | gr.Markdown("# 项目管理") 14 | gr.Markdown(""" 15 | ## 功能说明 16 | 在此标签页中,您可以: 17 | - 管理实验项目 18 | - 监控系统资源 19 | - 记录实验过程 20 | - 分析实验结果 21 | """) 22 | # 这里后续会添加具体内容 23 | return tab -------------------------------------------------------------------------------- /webui/tabs/tool_definition.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | def create_tool_definition_tab(): 4 | """工具定义标签页 5 | 6 | 该标签页用于定义和管理实验工具,包括: 7 | - 工具配置 8 | - 工具注册 9 | - 工具测试 10 | - 工具文档 11 | """ 12 | with gr.Blocks() as tab: 13 | gr.Markdown("# 工具定义") 14 | gr.Markdown(""" 15 | ## 功能说明 16 | 在此标签页中,您可以: 17 | - 配置实验所需的工具 18 | - 注册新的工具 19 | - 测试工具功能 20 | - 查看工具文档 21 | """) 22 | # 这里后续会添加具体内容 23 | return tab -------------------------------------------------------------------------------- /webui/tabs/training_deployment.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | def create_training_deployment_tab(): 4 | """训练部署标签页 5 | 6 | 该标签页用于训练模型和部署实验,包括: 7 | - 训练配置 8 | - 训练监控 9 | - 模型部署 10 | - 实验评估 11 | """ 12 | with gr.Blocks() as tab: 13 | gr.Markdown("# 训练部署") 14 | gr.Markdown(""" 15 | ## 功能说明 16 | 在此标签页中,您可以: 17 | - 配置训练参数 18 | - 监控训练过程 19 | - 部署训练模型 20 | - 评估实验结果 21 | """) 22 | # 这里后续会添加具体内容 23 | return tab -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_11d607b7-be32-4947-9087-88f808616b56_30594.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 46189, 3 | "iopub_port": 34123, 4 | "stdin_port": 36193, 5 | "control_port": 32895, 6 | "hb_port": 36215, 7 | "ip": "127.0.0.1", 8 | "key": "e2c14147-c168d7a4bfbd70bf7c43dc9b", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 38489, 3 | "iopub_port": 37727, 4 | "stdin_port": 37469, 5 | "control_port": 34083, 6 | "hb_port": 35269, 7 | "ip": "127.0.0.1", 8 | "key": "2863fae1-3c42df3283f8ef7a517e338f", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 44029, 3 | "iopub_port": 41935, 4 | "stdin_port": 42815, 5 | "control_port": 34975, 6 | "hb_port": 49955, 7 | "ip": "127.0.0.1", 8 | "key": "72406bee-dde69fc444aa49e9fb53a771", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 33685, 3 | "iopub_port": 33521, 4 | "stdin_port": 46845, 5 | "control_port": 33759, 6 | "hb_port": 46539, 7 | "ip": "127.0.0.1", 8 | "key": "c1593018-1aaacb1c0422abc6cae4a134", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_67ad306a-e335-4294-b241-514085b015a3_6550.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 39877, 3 | "iopub_port": 36447, 4 | "stdin_port": 45291, 5 | "control_port": 46463, 6 | "hb_port": 55285, 7 | "ip": "127.0.0.1", 8 | "key": "75a02734-a0b6dea9b859d2e45f8d29a1", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 35669, 3 | "iopub_port": 42719, 4 | "stdin_port": 39277, 5 | "control_port": 34731, 6 | "hb_port": 37443, 7 | "ip": "127.0.0.1", 8 | "key": "771f30ef-c013140eaf7f6fa165404952", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 35535, 3 | "iopub_port": 40593, 4 | "stdin_port": 34717, 5 | "control_port": 44503, 6 | "hb_port": 40187, 7 | "ip": "127.0.0.1", 8 | "key": "fa3bde5d-b9b9e82f4d83612ec4f29d20", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/kernel_connection_file_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.json: -------------------------------------------------------------------------------- 1 | { 2 | "shell_port": 38917, 3 | "iopub_port": 44457, 4 | "stdin_port": 36495, 5 | "control_port": 41571, 6 | "hb_port": 58689, 7 | "ip": "127.0.0.1", 8 | "key": "ae264de6-d8099f5dedac85249db6aafe", 9 | "transport": "tcp", 10 | "signature_scheme": "hmac-sha256", 11 | "kernel_name": "" 12 | } -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_11d607b7-be32-4947-9087-88f808616b56_30594.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_14fc8265-c6ad-4e2c-9645-f2b288de818e_187821.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_55b220ca-b6ee-48b1-8c81-c838606d1c12_22154.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_5ca68dbe-c08c-42d4-93f8-e634bd09997f_38112.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_67ad306a-e335-4294-b241-514085b015a3_6550.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_816fc0a1-50c8-4c81-9977-90c3593d5a04_58250.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_891508c0-5e4b-4f0a-82c4-6e1f91d9a69b_14398.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | -------------------------------------------------------------------------------- /workspace/tools/code_interpreter/launch_kernel_8f23a421-203b-4a73-ad0e-9926fe8aaf11_45987.py: -------------------------------------------------------------------------------- 1 | 2 | from ipykernel import kernelapp as app 3 | app.launch_new_instance() 4 | --------------------------------------------------------------------------------