├── .gemini └── config.yaml ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ └── feature-request.yml ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── .deprecate │ ├── e2e_eval_aime24.yml │ ├── e2e_ppo_trainer.yml │ ├── e2e_ppo_trainer_megatron_sglang.yml │ ├── e2e_prime.yml │ ├── e2e_spin.yml │ └── e2e_sppo.yml │ ├── README.md │ ├── check-pr-title.yml │ ├── checkpoint_converter.yml │ ├── cpu_unit_tests.yml │ ├── doc.yml │ ├── docker-build-ascend-a2.yml │ ├── docker-build-ascend-a3.yml │ ├── e2e_ascend.yml │ ├── e2e_dapo.yml │ ├── e2e_fully_async_policy.yml │ ├── e2e_genrm_remote.yml │ ├── e2e_one_step_off_policy.yml │ ├── e2e_ppo_trainer.yml │ ├── e2e_ppo_trainer_megatron_sglang.yml │ ├── e2e_ppo_trainer_megatron_sglang_2.yml │ ├── e2e_ppo_trainer_megatron_vllm.yml │ ├── e2e_ppo_trainer_megatron_vllm_2.yml │ ├── e2e_sft.yml │ ├── e2e_transferqueue.yml │ ├── gpu_unit_tests.yml │ ├── model.yml │ ├── pre-commit.yml │ ├── reward_model_sglang.yml │ ├── reward_model_vllm.yml │ ├── sanity.yml │ ├── scorecard.yml │ ├── secrets_scan.yml │ ├── sgl.yml │ ├── type-coverage-check.yml │ └── vllm.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .vscode └── settings.json ├── CONTRIBUTING.md ├── LICENSE ├── Notice.txt ├── README.md ├── docker ├── Dockerfile.isaaclab230 ├── Dockerfile.stable.sglang ├── Dockerfile.stable.vllm ├── README.md ├── ascend │ ├── Dockerfile.ascend_8.2.rc1_a2 │ ├── Dockerfile.ascend_8.2.rc1_a3 │ ├── Dockerfile.ascend_8.3.rc1_a2 │ └── Dockerfile.ascend_8.3.rc1_a3 ├── aws │ ├── Dockerfile.extention.awsefa │ └── Dockerfile.ngc.vllm0.8.sagemaker ├── rocm │ ├── Apptainerfile.rocm │ ├── Dockerfile.rocm │ ├── Dockerfile.rocm7 │ ├── Dockerfile.rocm_verl-0.3.0.post1 │ └── Dockerfile.rocm_verl-0.4.1 ├── verl0.4-cu124-torch2.6-fa2.7.4 │ ├── Dockerfile.app.sglang.vllm.mcore0.12 │ ├── Dockerfile.app.sglang.vllm.mcore0.12.deepep │ ├── Dockerfile.app.sglang.vllm.mcore0.13.preview │ ├── Dockerfile.app.vllm.mcore0.12 │ ├── Dockerfile.app.vllm.mcore0.12.deepep │ ├── Dockerfile.app.vllm.mcore0.13.preview │ ├── Dockerfile.base │ └── README.md ├── verl0.5-cu126-torch2.7-fa2.7.4 │ ├── Dockerfile.app.sglang0.4.10.post2.mcore0.13 │ ├── Dockerfile.app.sglang0.4.9.post6.mcore0.13 │ ├── Dockerfile.app.vllm.mcore0.13 │ ├── Dockerfile.app.vllm.mcore0.15 │ ├── Dockerfile.base.torch2.7.1 │ └── README.md ├── verl0.5-cu126-torch2.7.1-fa2.8.0 │ ├── Dockerfile.app.sglang.mcore0.12 │ ├── Dockerfile.app.sglang.mcore0.13.preview │ ├── Dockerfile.base │ └── README.md ├── verl0.5-preview-cu128-torch2.7.1-fa2.8.0 │ ├── Dockerfile.app.sglang.megatron │ ├── Dockerfile.base │ └── README.md └── verl0.6-cu128-torch2.8.0-fa2.7.4 │ ├── Dockerfile.app.sglang │ ├── Dockerfile.base │ └── Dockerfile.vllm011.mcore_gpt-oss ├── docs ├── Makefile ├── README.md ├── README_vllm0.7.md ├── README_vllm0.8.md ├── _static │ ├── custom.css │ ├── js │ │ ├── resizable-sidebar.js │ │ └── runllm-widget.js │ └── logo.png ├── advance │ ├── agent_loop.rst │ ├── async-on-policy-distill.md │ ├── attention_implementation.rst │ ├── checkpoint.rst │ ├── dpo_extension.rst │ ├── fp8.md │ ├── fsdp_extension.rst │ ├── fully_async.md │ ├── grafana_prometheus.md │ ├── megatron_extension.rst │ ├── one_step_off.md │ ├── placement.rst │ ├── ppo_lora.rst │ ├── reward_loop.rst │ ├── rollout_skip.rst │ ├── rollout_trace.rst │ └── rope.rst ├── algo │ ├── baseline.md │ ├── collabllm.md │ ├── dapo.md │ ├── entropy.md │ ├── gpg.md │ ├── grpo.md │ ├── opo.md │ ├── ppo.md │ ├── rollout_corr.md │ ├── rollout_corr_math.md │ ├── spin.md │ └── sppo.md ├── amd_tutorial │ ├── amd_build_dockerfile_page.rst │ └── amd_vllm_page.rst ├── api │ ├── data.rst │ ├── single_controller.rst │ ├── trainer.rst │ └── utils.rst ├── ascend_tutorial │ ├── ascend_consistency.rst │ ├── ascend_profiling_en.rst │ ├── ascend_profiling_zh.rst │ ├── ascend_quick_start.rst │ ├── ascend_sglang_quick_start.rst │ └── dockerfile_build_guidance.rst ├── conf.py ├── data │ └── transfer_queue.md ├── examples │ ├── config.rst │ ├── gsm8k_example.rst │ ├── multi_modal_example.rst │ ├── ppo_code_architecture.rst │ ├── sandbox_fusion_example.rst │ └── skypilot_examples.rst ├── faq │ └── faq.rst ├── hybrid_flow.rst ├── index.rst ├── perf │ ├── best_practices.rst │ ├── device_tuning.rst │ ├── dpsk.md │ ├── nsight_profiling.md │ ├── perf_tuning.rst │ └── verl_profiler_system.md ├── preparation │ ├── prepare_data.rst │ └── reward_function.rst ├── requirements-docs.txt ├── sglang_multiturn │ ├── interaction_system.rst │ ├── multiturn.rst │ ├── sandbox_fusion.rst │ └── search_tool_example.rst ├── single_controller.rst ├── start │ ├── agentic_rl.rst │ ├── install.rst │ ├── more_resources.rst │ ├── multinode.rst │ ├── quickstart.rst │ └── ray_debug_tutorial.rst └── workers │ ├── fsdp_workers.rst │ ├── megatron_workers.rst │ ├── model_engine.rst │ ├── ray_trainer.rst │ └── sglang_worker.rst ├── examples ├── data_preprocess │ ├── aime2024_multiturn_w_tool.py │ ├── dapo_multiturn_w_tool.py │ ├── full_hh_rlhf.py │ ├── geo3k.py │ ├── geo3k_multiturn_w_tool.py │ ├── gsm8k.py │ ├── gsm8k_multiturn_sft.py │ ├── gsm8k_multiturn_w_interaction.py │ ├── gsm8k_multiturn_w_tool.py │ ├── gsm8k_tool_agent_loop.py │ ├── hellaswag.py │ ├── math_dataset.py │ ├── multiturn.py │ └── preprocess_search_r1_dataset.py ├── generation │ ├── run_deepseek7b_mutli_node.sh │ └── run_deepseek_v2_lite_math.sh ├── gmpo_trainer │ ├── README.md │ ├── run_qwen2_5-7b_math.sh │ ├── test_dapo_7b_math.sh │ └── test_dapo_qwen3_30b_math.sh ├── gpg_trainer │ ├── gpg.md │ ├── run_qwen2-7b_math.sh │ └── run_qwen2-7b_math_megatron.sh ├── grpo_trainer │ ├── README.md │ ├── run_deepseek671b_math_megatron_80gb.sh │ ├── run_deepseek671b_math_megatron_96gb.sh │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_math.sh │ ├── run_deepseek7b_llm_math_megatron.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_glm41v_9b.sh │ ├── run_gptoss_20b.sh │ ├── run_minicpmo2_6.sh │ ├── run_mistral13b_skyworkrm_hhrlhf.sh │ ├── run_moonlight16b_math_megatron.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_math.sh │ ├── run_qwen2-7b_math_megatron.sh │ ├── run_qwen2-7b_math_megatron_lora.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_seq_balance_math_megatron.sh │ ├── run_qwen2-7b_sgl_megatron.sh │ ├── run_qwen2_5-3b_gsm8k_grpo_lora.sh │ ├── run_qwen2_5-3b_gsm8k_grpo_lora_from_adapter.sh │ ├── run_qwen2_5-7b_math_megatron_diff_tp.sh │ ├── run_qwen2_5_32b_grpo_npu.sh │ ├── run_qwen2_5_7b_grpo_discrete_prof_npu.sh │ ├── run_qwen2_5_7b_grpo_e2e_prof_npu.sh │ ├── run_qwen2_5_7b_grpo_npu.sh │ ├── run_qwen2_5_vl-7b-megatron.sh │ ├── run_qwen2_5_vl-7b-sglang.sh │ ├── run_qwen2_5_vl-7b.sh │ ├── run_qwen2_5_vl-7b_freeze_vision.sh │ ├── run_qwen2_5_vl-7b_lora.sh │ ├── run_qwen2_5_vl-7b_seq_balance.sh │ ├── run_qwen2_5_vl_32b_npu.sh │ ├── run_qwen2_5_vl_3b_npu.sh │ ├── run_qwen2_5_vl_7b_npu.sh │ ├── run_qwen3-235b_megatron_96gb.sh │ ├── run_qwen3-32b_npu.sh │ ├── run_qwen3-8b.sh │ ├── run_qwen3-8b_npu.sh │ ├── run_qwen3_8b_grpo_sglang_1k_spmd_npu.sh │ ├── run_qwen3_8b_grpo_sglang_32k_spmd_npu.sh │ ├── run_qwen3_vl-235b-megatron.sh │ ├── run_qwen3_vl-30b-megatron.sh │ ├── run_qwen3_vl-8b-megatron.sh │ ├── run_qwen3moe-30b_megatron_96gb.sh │ ├── run_qwen3moe-30b_megatron_lora.sh │ └── run_seed_oss_36b.sh ├── gspo_trainer │ ├── run_qwen30b_gspo.sh │ ├── test_gspo_3b_math.sh │ ├── test_gspo_3b_math_slurm.sh │ └── test_gspo_qwen30b_a3b_ep.sh ├── ppo_trainer │ ├── README.md │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_modelscope.sh │ ├── run_deepseek7b_llm_pfppo.sh │ ├── run_deepseek7b_llm_sandbox_fusion.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_deepseek_math_gsm8k_megatron_nsys.sh │ ├── run_gemma.sh │ ├── run_moonlight16b_a3b_gsm8k_megatron.sh │ ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_rm_seq_balance_fused_kernels.sh │ ├── run_qwen2-7b_rm_seq_balance_nsys.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_sglang_seq_balance.sh │ ├── run_qwen2.5-32b.sh │ └── run_qwen3-8b_npu.sh ├── ray │ └── tutorial.ipynb ├── reinforce_plus_plus_trainer │ ├── run_qwen2-7b_math_rf.sh │ └── run_qwen2-7b_math_rf_baseline.sh ├── remax_trainer │ ├── run_qwen2.5-3b_seq_balance.sh │ └── run_qwen2.5-7b_seq_balance.sh ├── rloo_trainer │ └── run_qwen2-7b.sh ├── rollout_correction │ └── run_with_rollout_corr.sh ├── router_replay │ ├── README.md │ └── run_qwen30_a3b_megatron_vllm.sh ├── sft │ ├── gsm8k │ │ ├── run_deepseek_6b7.sh │ │ ├── run_gemma_2b.sh │ │ ├── run_gemma_7b.sh │ │ ├── run_qwen3_8b_sft_peft_sp2_npu.sh │ │ ├── run_qwen_05_peft.sh │ │ ├── run_qwen_05_sp2.sh │ │ ├── run_qwen_05_sp2_liger.sh │ │ └── run_seed_oss_36b_sft.sh │ └── multiturn │ │ └── run_qwen_05_sp2.sh ├── sglang_multiturn │ ├── README.md │ ├── config │ │ ├── geo3k_multiturn_grpo.yaml │ │ ├── geo3k_multiturn_megatron_grpo.yaml │ │ ├── gsm8k_multiturn_grpo.yaml │ │ ├── gsm8k_multiturn_grpo_server.yaml │ │ ├── gsm8k_multiturn_grpo_w_interaction.yaml │ │ ├── gsm8k_multiturn_megatron_grpo.yaml │ │ ├── interaction_config │ │ │ └── gsm8k_interaction_config.yaml │ │ ├── retool_multiturn_grpo.yaml │ │ ├── search_multiturn_grpo.yaml │ │ ├── search_multiturn_grpo_one_step_off.yaml │ │ └── tool_config │ │ │ ├── geo3k_tool_config.yaml │ │ │ ├── gsm8k_tool_config.yaml │ │ │ ├── mcp_server.json │ │ │ ├── mcp_tool_config.yaml │ │ │ ├── sandbox_fusion_tool_config.yaml │ │ │ └── search_tool_config.yaml │ ├── geo3k │ │ ├── run_qwen2.5-3b_geo3k_multiturn.sh │ │ ├── run_qwen2.5-3b_geo3k_multiturn_4xgpu.sh │ │ └── run_qwen2.5-3b_megatron_geo3k_multiturn.sh │ ├── run_qwen0.5b_gsm8k_multiturn_curriculum.sh │ ├── run_qwen2.5-0.5b_gsm8k_multiturn_w_interaction.sh │ ├── run_qwen2.5-3b_gsm8k_multiturn.sh │ ├── run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh │ ├── run_qwen2.5-3b_gsm8k_multiturn_4xgpu_server.sh │ ├── run_qwen2.5-3b_gsm8k_multiturn_server.sh │ ├── run_qwen2.5-3b_gsm8k_multiturn_vllm_fsdp.sh │ ├── run_qwen2.5-3b_gsm8k_tool_agent_mlflow.sh │ ├── run_qwen2.5-3b_megatron_gsm8k_multiturn.sh │ ├── run_qwen3-4b_gsm8k_multiturn.sh │ ├── run_qwen3_4b_dapo_multiturn.sh │ └── search_r1_like │ │ ├── local_dense_retriever │ │ ├── download.py │ │ └── retrieval_server.py │ │ └── run_qwen2.5-3b_instruct_search_multiturn.sh ├── skypilot │ ├── README.md │ ├── verl-grpo.yaml │ ├── verl-multiturn-tools.yaml │ └── verl-ppo.yaml ├── slurm │ └── ray_on_slurm.slurm ├── split_placement │ ├── README.md │ ├── config │ │ └── ppo_trainer_split.yaml │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py ├── tuning │ ├── 0.5b │ │ └── qwen2-0.5b_grpo-lora_1_h100_fsdp_vllm.sh │ ├── 1.5b │ │ └── qwen2-1.5b_grpo-lora_1_h100_fsdp_vllm.sh │ ├── 14b │ │ ├── qwen2-14b_grpo-lora_2_h100_fsdp_vllm.sh │ │ └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh │ ├── 32b │ │ ├── qwen2-32b_grpo-lora_4_h100_fsdp_vllm.sh │ │ └── qwen2_32B_grpo_8_h20_megatron_vllm.sh │ ├── 3b │ │ └── qwen2-3b_grpo-lora_1_h100_fsdp_vllm.sh │ ├── 70b │ │ ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh │ │ ├── qwen2-70b_grpo_32_h800_fsdp_vllm.sh │ │ └── qwen2-72b_grpo-lora_8_h100_fsdp_vllm.sh │ └── 7b │ │ ├── qwen2-7b_grpo-lora_1_h100_fsdp_vllm.sh │ │ └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh └── tutorial │ └── agent_loop_get_started │ ├── agent_loop_tutorial.ipynb │ └── sandbox.py ├── pyproject.toml ├── recipe ├── README.md ├── collabllm │ ├── README.md │ ├── collabllm_agent_loop.py │ ├── collabllm_interation.py │ ├── config │ │ ├── agent.yaml │ │ └── collabllm_interaction_config.yaml │ ├── metrics │ │ ├── accuracy.py │ │ ├── bleu_score.py │ │ ├── interactivity.py │ │ ├── pass_rate.py │ │ └── token_amount.py │ ├── process_dataset.py │ ├── reward_function.py │ ├── train_rl_collabllm.sh │ ├── train_sft_collabllm.sh │ └── utils.py ├── dapo │ ├── README.md │ ├── config │ │ ├── dapo_megatron_trainer.yaml │ │ └── dapo_trainer.yaml │ ├── dapo_ray_trainer.py │ ├── main_dapo.py │ ├── prepare_dapo_data.sh │ ├── run_dapo_early_qwen2.5_32b.sh │ ├── run_dapo_qwen2.5_32b.sh │ ├── run_dapo_qwen2.5_32b_npu.sh │ ├── run_dapo_qwen2.5_32b_rollout_corr.sh │ ├── run_dapo_qwen2.5_7b_npu.sh │ ├── run_dapo_qwen3_14b_base_npu.sh │ ├── run_dapo_qwen3_8b_base_npu.sh │ ├── run_dapo_qwen3_moe_30b_base_fsdp_npu.sh │ ├── run_dapo_qwen3_moe_30b_megatron_npu.sh │ ├── run_dapo_qwen3_moe_30b_vllm_fp8_rollout.sh │ ├── run_dapo_wo_ds_qwen2.5_32b.sh │ ├── runtime_env.yaml │ ├── test_dapo_7b.sh │ ├── test_dapo_7b_math.sh │ ├── test_dapo_7b_math_lora.sh │ ├── test_dapo_7b_math_megatron.sh │ ├── test_dapo_8b_megatron_fp16.sh │ ├── test_dapo_8b_megatron_fp8train.sh │ ├── test_dapo_dspk_671b_megatron_96gb.sh │ ├── test_dapo_glm_air_megatron.sh │ ├── test_dapo_gptoss_20b_megatron.sh │ ├── test_dapo_qwen3_30b_math.sh │ ├── test_dapo_qwen3_30b_math_single_node.sh │ └── test_dapo_qwen3_moe_30b_megatron_fp16.sh ├── deepeyes │ ├── README.md │ ├── configs │ │ ├── deepeyes_multiturn_grpo.yaml │ │ └── image_zoom_in_tool_config.yaml │ ├── deepeyes.py │ └── run_deepeyes_grpo.sh ├── entropy │ ├── 32b_clip_cov.sh │ ├── 32b_kl_cov.sh │ ├── 32b_kl_cov_mininbsz.sh │ ├── 7b_clip_cov.sh │ ├── 7b_kl_cov.sh │ ├── README.md │ ├── config │ │ └── entropy_trainer.yaml │ ├── entropy_ray_trainer.py │ ├── main_entropy.py │ ├── reward.py │ └── reward_score │ │ ├── __init__.py │ │ └── entropy_math │ │ ├── __init__.py │ │ ├── grader.py │ │ └── math_normalize.py ├── fapo │ ├── README.md │ ├── config │ │ └── rm_config.yaml │ ├── prepare_fapo_data.py │ ├── reward_fn_genrm.py │ ├── reward_fn_reasoning.py │ ├── reward_fn_reasoning_remote.py │ ├── run_baseline_32b.sh │ ├── run_baseline_7b.sh │ ├── run_fapo_32b.sh │ ├── run_fapo_32b_remote.sh │ ├── run_fapo_7b.sh │ ├── run_fapo_7b_remote.sh │ ├── run_fapo_genrm_train.sh │ └── runtime_env.yaml ├── flowrl │ ├── FLOWRL_SIMPLE_GUIDE.md │ ├── README.md │ ├── __init__.py │ ├── config │ │ └── flowrl_trainer.yaml │ ├── figures │ │ ├── file.svg │ │ ├── flowrl.pdf │ │ └── flowrl.png │ ├── flowrl_actor.py │ ├── flowrl_fsdp_worker.py │ ├── flowrl_ray_trainer.py │ ├── main_flowrl.py │ ├── prepare │ │ ├── prepare_data.sh │ │ └── prepare_model.sh │ └── run_flowrl_qwen2.5_7b.sh ├── fully_async_policy │ ├── README.md │ ├── README_zh.md │ ├── agent_loop │ │ ├── __init__.py │ │ ├── agent_loop.py │ │ ├── partial_single_turn_agent_loop.py │ │ └── partial_tool_agent_loop.py │ ├── config │ │ ├── fully_async_ppo_megatron_trainer.yaml │ │ └── fully_async_ppo_trainer.yaml │ ├── detach_utils.py │ ├── fsdp2_utils.py │ ├── fsdp_workers.py │ ├── fully_async_main.py │ ├── fully_async_rollouter.py │ ├── fully_async_trainer.py │ ├── megatron_utils.py │ ├── megatron_worker.py │ ├── message_queue.py │ ├── param_sync.py │ ├── ray_trainer.py │ ├── shell │ │ ├── dapo_7b_async_retool.sh │ │ ├── dapo_7b_math_fsdp2_16_16.sh │ │ ├── dapo_7b_math_fsdp2_32_32.sh │ │ ├── dapo_7b_math_fsdp2_4_12.sh │ │ ├── dapo_7b_math_fsdp2_4_4.sh │ │ ├── dapo_7b_math_fsdp2_64_64.sh │ │ ├── dapo_7b_math_fsdp2_64_64_mis.sh │ │ ├── dapo_7b_math_fsdp2_8_8.sh │ │ ├── geo3k_qwen25vl_7b_megatron_4_4.sh │ │ ├── grpo_30b_a3b_base_math_megatron_96_32.sh │ │ ├── grpo_30b_a3b_base_math_megatron_96_32_mis.sh │ │ └── runtime_env.yaml │ ├── unittest │ │ └── simple_streaming_demo.py │ └── vllm_rollout │ │ ├── __init__.py │ │ └── vllm_async_server.py ├── genrm_remote │ ├── README.md │ ├── reward_function.py │ └── run_genrm_remote.sh ├── gkd │ ├── README.md │ ├── config │ │ ├── on_policy_distill_trainer.yaml │ │ └── runtime_env.yaml │ ├── main_gkd.py │ ├── megatron_kl_loss.py │ ├── megatron_utils.py │ ├── megatron_workers.py │ ├── ray_trainer.py │ ├── run_moonlight_dsv3_training.sh │ ├── teacher │ │ ├── __init__.py │ │ ├── client.py │ │ ├── join_server.sh │ │ ├── proxy.py │ │ ├── start_server.sh │ │ ├── utils.py │ │ ├── vllm_engine.py │ │ └── worker.py │ ├── teacher_utils.py │ ├── test_qwen.sh │ ├── test_qwen_sglang.sh │ └── test_teacher_server.py ├── infigui-g1 │ ├── README.md │ ├── reward_fn.py │ ├── run_3b.sh │ └── run_7b.sh ├── minicpmo │ └── rl_dataset.py ├── one_step_off_policy │ ├── README.md │ ├── agent_loop │ │ ├── __init__.py │ │ └── agent_loop.py │ ├── config │ │ ├── one_step_off_ppo_megatron_trainer.yaml │ │ └── one_step_off_ppo_trainer.yaml │ ├── distributed_util.py │ ├── fsdp_workers.py │ ├── main_ppo.py │ ├── megatron_workers.py │ ├── ray_trainer.py │ ├── shell │ │ ├── dapo_7b_math_fsdp2_4_12.sh │ │ ├── dapo_7b_math_fsdp2_64_64.sh │ │ ├── dapo_7b_math_fsdp2_64_64_ris.sh │ │ ├── dapo_7b_math_fsdp2_colocate.sh │ │ ├── dapo_7b_math_fsdp2_sglang_4_12.sh │ │ ├── dapo_7b_math_fsdp2_sglang_colocate.sh │ │ ├── dapo_7b_math_megatron_4_12.sh │ │ ├── dapo_7b_math_megatron_colocate.sh │ │ ├── grpo_0.6b_gsm8k_fsdp2_2_6.sh │ │ ├── grpo_0.6b_gsm8k_fsdp2_sglang_2_6.sh │ │ ├── grpo_3b_gsm8k_fsdp2_2_6.sh │ │ └── grpo_qwen3_8b_gsm8k_fsdp2_8_8_npu.sh │ └── utils.py ├── open_math_reasoning │ ├── README.md │ ├── compute_score.py │ ├── prepare_eval_dataset.py │ ├── prepare_nvidia-OpenMathReasoning_sft.py │ ├── run_eval.sh │ ├── run_generation.sh │ └── run_sft_qwen3_8b.sh ├── prime │ ├── __init__.py │ ├── config │ │ └── prime_trainer.yaml │ ├── main_prime.py │ ├── prime_core_algos.py │ ├── prime_dp_rm.py │ ├── prime_fsdp_workers.py │ ├── prime_ray_trainer.py │ ├── run_prime_qwen.sh │ └── run_prime_qwen_code.sh ├── r1 │ ├── README.md │ ├── __init__.py │ ├── config │ │ └── evaluation.yaml │ ├── data_process.py │ ├── main_eval.py │ ├── reward_score.py │ ├── run_r1_distill_qwen.sh │ └── tasks │ │ ├── __init__.py │ │ ├── gpqa.py │ │ ├── livecodebench.py │ │ └── math_reward.py ├── r1_ascend │ ├── Dockerfile.vllm_ascend.mindspeed.deepseekV3 │ ├── README.md │ ├── README_zh.md │ ├── deepscaler.py │ ├── engine_core.py │ ├── figures │ │ ├── response_len.png │ │ ├── rewards.png │ │ └── val_score.png │ ├── json_to_parquet.py │ ├── main_ppo.py │ ├── megatron_workers.py │ ├── ray_start_grpo_npu.sh │ ├── run_deepseekv3_671b_grpo_megatron_npu.sh │ ├── vllm_parallel_state.py │ └── vllm_rollout_spmd.py ├── spin │ ├── README.md │ ├── config │ │ └── spin_trainer.yaml │ ├── core_algos.py │ ├── dp_actor.py │ ├── fsdp_workers.py │ ├── main_spin.py │ ├── run_spin.sh │ ├── spin_trainer.py │ └── utils.py ├── sppo │ ├── README.md │ ├── __init__.py │ ├── config.py │ ├── config │ │ └── sppo_trainer.yaml │ ├── dp_actor.py │ ├── main_sppo.py │ ├── run_qwen2.5-7b_rm.sh │ ├── sppo_ray_trainer.py │ └── sppo_worker.py ├── transfer_queue │ ├── agent_loop.py │ ├── config │ │ ├── transfer_queue_ppo_megatron_trainer.yaml │ │ └── transfer_queue_ppo_trainer.yaml │ ├── main_ppo.py │ ├── ray_trainer.py │ └── run_qwen3-8b_transferqueue.sh └── vla │ ├── README.md │ ├── config │ └── rob_ppo_trainer.yaml │ ├── dp_rob.py │ ├── env_loop.py │ ├── envs │ ├── __init__.py │ ├── action_utils.py │ ├── isaac_env │ │ ├── __init__.py │ │ └── isaac_env.py │ └── libero_env │ │ ├── __init__.py │ │ ├── libero_env.py │ │ ├── utils.py │ │ └── venv.py │ ├── fsdp_workers.py │ ├── main_ppo.py │ ├── models │ └── openvla_oft │ │ ├── __init__.py │ │ ├── configuration_prismatic.py │ │ ├── constants.py │ │ ├── modeling_prismatic.py │ │ ├── processing_prismatic.py │ │ └── train_utils.py │ ├── naive_rollout_rob.py │ ├── prepare_libero_dataset.py │ ├── requirements_vla.txt │ ├── rob_ray_trainer.py │ ├── run_simpleVLA_isaac_disagg.sh │ ├── run_simpleVLA_libero_grpo.sh │ └── workers │ └── env │ ├── env_loop_wg_test.py │ ├── env_manager.py │ └── env_worker.py ├── requirements-cuda.txt ├── requirements-npu.txt ├── requirements.txt ├── requirements_sglang.txt ├── requirements_transferqueue.txt ├── scripts ├── __init__.py ├── converter_hf_to_mcore.py ├── diagnose.py ├── generate_trainer_config.sh ├── init_random_model.py ├── install_vllm_sglang_mcore.sh ├── legacy_model_merger.py ├── print_cfg.py └── rollout_viewer.py ├── setup.py ├── tests ├── README.md ├── __init__.py ├── experimental │ ├── agent_loop │ │ ├── agent_utils.py │ │ ├── qwen_vl_tool_chat_template.jinja2 │ │ ├── test_agent_loop_reward.py │ │ ├── test_agent_loop_reward_model.py │ │ ├── test_basic_agent_loop.py │ │ ├── test_gpt_oss_tool_parser.py │ │ ├── test_multi_modal.py │ │ └── test_standalone_rollout.py │ ├── reward │ │ ├── reward_fn.py │ │ ├── test_agent_loop_reward_manager.py │ │ ├── test_agent_reward_loop_colocate.py │ │ ├── test_async_token_bucket_on_cpu.py │ │ ├── test_rate_limited_reward_manager_on_cpu.py │ │ ├── test_reward_model_disrm.py │ │ └── test_reward_model_genrm.py │ └── vla │ │ └── test_sim_envs.py ├── interactions │ ├── __init__.py │ ├── test_gsm8k_interaction.py │ └── test_interaction_registry.py ├── kill_github_tests.sh ├── models │ ├── test_engine.py │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── single_controller │ ├── __init__.py │ ├── base │ │ └── test_decorator.py │ ├── check_worker_alive │ │ └── main.py │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_auto_padding_on_cpu.py │ ├── test_colocated_workers.py │ ├── test_colocated_workers_fused.py │ ├── test_data_transfer.py │ ├── test_decorator_on_cpu.py │ ├── test_device_mesh_register.py │ ├── test_driverfunc_to_worker.py │ ├── test_fused_workers_on_cpu.py │ ├── test_get_set_dispatch_collect_cpu.py │ ├── test_high_level_scheduling_api.py │ ├── test_nested_worker.py │ ├── test_ray_collectives.py │ ├── test_ray_local_envs_on_cpu.py │ ├── test_ray_utils_on_cpu.py │ ├── test_rvdz.py │ ├── test_split_resource_pool.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── special_distributed │ ├── README.md │ ├── run_all.sh │ ├── test_fsdp_ckpt.py │ ├── test_mcore_config_converter.py │ ├── test_tensor_dict.py │ └── test_torch_functional.py ├── special_e2e │ ├── README.md │ ├── __init__.py │ ├── check_custom_rwd_fn.py │ ├── check_results.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ ├── __init__.py │ │ │ ├── task.py │ │ │ └── tokenizer.py │ ├── generation │ │ ├── run_gen_qwen05.sh │ │ └── run_gen_qwen05_server.sh │ ├── ppo_trainer │ │ ├── expert_parallel │ │ │ ├── qwen2moe_minimal.json │ │ │ └── qwen3moe_minimal.json │ │ ├── run_function_reward.sh │ │ ├── run_model_reward.sh │ │ ├── run_single_gpu.sh │ │ └── run_single_gpu_with_engine.sh │ ├── run_dapo.sh │ ├── run_fully_async_policy.sh │ ├── run_genrm_remote.sh │ ├── run_geo3k_fsdp_sgl_multiturn_w_tool.sh │ ├── run_grpo_lora_with_merge.sh │ ├── run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh │ ├── run_gsm8k_fsdp_sgl_multiturn_w_tool.sh │ ├── run_one_step_off_policy.sh │ ├── run_ppo_trainer_megatron.sh │ ├── run_prime.sh │ ├── run_r1_distill_qwen_aime24_eval.sh │ ├── run_spin.sh │ ├── run_sppo.sh │ ├── run_test.sh │ ├── run_transferqueue.sh │ └── sft │ │ ├── compare_sft_engine_results.py │ │ ├── run_sft.sh │ │ ├── run_sft_engine_gsm8k.sh │ │ ├── test_sft_engine_all.sh │ │ └── test_sp_loss_match.py ├── special_npu │ ├── run_qwen2_5_05b_dapo.sh │ ├── run_qwen2_5_05b_grpo.sh │ ├── run_qwen2_5_05b_grpo_mindspeed.sh │ ├── run_qwen2_5_05b_sft_peft_sp2.sh │ ├── run_qwen2_5_vl_3b_npu.sh │ ├── run_qwen3_06b_ppo.sh │ └── run_qwen3_30b_dapo_mindspeed.sh ├── special_sanity │ ├── check_api_docs.py │ ├── check_dataproto_usage.py │ ├── check_device_api_usage.py │ ├── check_docs_time_info.py │ ├── check_docstrings.py │ ├── check_license.py │ ├── check_pr_description.py │ ├── check_pr_title.py │ ├── test_config_docs.py │ ├── test_import.py │ ├── type_coverage_check.py │ ├── validate_imported_docs.py │ └── validate_structure.py ├── special_standalone │ ├── README.md │ └── test_memory_buffers.py ├── test_base_config_on_cpu.py ├── test_protocol_on_cpu.py ├── test_protocol_v2_on_cpu.py ├── trainer │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ ├── legacy_ppo_megatron_trainer.yaml │ │ ├── legacy_ppo_trainer.yaml │ │ ├── test_algo_config_on_cpu.py │ │ └── test_legacy_config_on_cpu.py │ └── ppo │ │ ├── __init__.py │ │ ├── test_core_algos_on_cpu.py │ │ ├── test_metric_utils_on_cpu.py │ │ ├── test_rollout_corr.py │ │ └── test_rollout_corr_integration.py ├── utils │ ├── _test_module.py │ ├── ckpt │ │ └── test_esi_save_ckpt_on_cpu.py │ ├── dataset │ │ ├── test_create_rl_sampler_on_cpu.py │ │ ├── test_multiturn_sft_dataset_on_cpu.py │ │ ├── test_rl_collate_fn_on_cpu.py │ │ ├── test_rl_dataset_on_cpu.py │ │ └── test_sft_dataset_on_cpu.py │ ├── debug │ │ └── test_metrics.py │ ├── megatron │ │ └── test_pipeline_parallel.py │ ├── reward_score │ │ ├── reward_score │ │ │ └── test_sandbox_fusion_on_cpu.py │ │ └── test_sandbox_on_cpu.py │ ├── test_activation_offload.py │ ├── test_config_on_cpu.py │ ├── test_flops_counter.py │ ├── test_fs_on_cpu.py │ ├── test_groupwise.py │ ├── test_import_utils_on_cpu.py │ ├── test_linear_cross_entropy.py │ ├── test_mlflow_key_sanitization.py │ ├── test_model_on_cpu.py │ ├── test_nvtx_profile.py │ ├── test_rollout_skip_on_cpu.py │ ├── test_rollout_trace_on_cpu.py │ ├── test_seqlen_balancing.py │ ├── test_special_linear_cross_entropy_tp.py │ ├── test_special_mstx_profile.py │ ├── test_temp_env_on_cpu.py │ ├── test_timeout_decorator_cpu.py │ └── test_torch_functional.py └── workers │ ├── actor │ └── test_special_dp_actor.py │ ├── config │ ├── test_actor_config_on_cpu.py │ ├── test_critic_config_on_cpu.py │ ├── test_engine_config_on_cpu.py │ └── test_optim_config_on_cpu.py │ ├── critic │ └── test_special_dp_critic.py │ ├── reward_manager │ └── test_registry_on_cpu.py │ ├── rollout │ ├── perf │ │ └── vllm_async_rollout.py │ ├── resource │ │ └── tool_configs │ │ │ ├── mcp_server.json │ │ │ ├── mcp_tool_config │ │ │ ├── sandbox_fusion_tool_config │ │ │ └── search_tool_config │ ├── rollout_sglang │ │ └── test_http_server_engine.py │ ├── rollout_vllm │ │ └── run_fsdp_vllm.py │ ├── test_hf_rollout.py │ ├── test_sglang_async_rollout_multimodal_delta.py │ └── test_sglang_rollout_sharding_manager.py │ ├── test_fsdp_attn_implementation.py │ └── test_fsdp_workers.py └── verl ├── __init__.py ├── base_config.py ├── experimental ├── __init__.py ├── agent_loop │ ├── __init__.py │ ├── agent_loop.py │ ├── prometheus_utils.py │ ├── single_turn_agent_loop.py │ ├── tool_agent_loop.py │ ├── tool_parser.py │ └── utils.py ├── dataset │ ├── __init__.py │ └── sampler.py ├── dynamic_dataset │ ├── __init__.py │ └── dynamicgen_dataset.py └── reward │ ├── __init__.py │ ├── reward_loop │ ├── __init__.py │ ├── base.py │ ├── dapo.py │ ├── limited.py │ ├── naive.py │ └── registry.py │ ├── reward_manager.py │ ├── reward_model.py │ └── router │ ├── inner_sglang_router.py │ └── naive_router.py ├── interactions ├── __init__.py ├── base.py ├── gsm8k_interaction.py ├── utils │ ├── __init__.py │ └── interaction_registry.py └── weather_interaction.py ├── model_merger ├── __init__.py ├── __main__.py ├── base_model_merger.py ├── fsdp_model_merger.py └── megatron_model_merger.py ├── models ├── README.md ├── __init__.py ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ ├── llama_loader_depracated.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── mcore │ ├── __init__.py │ ├── bridge.py │ ├── config_converter.py │ ├── loader.py │ ├── mbridge.py │ ├── model_forward.py │ ├── model_forward_1f1b_overlap.py │ ├── model_forward_fused.py │ ├── model_initializer.py │ ├── patch_v012.py │ ├── qwen2_5_vl │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── model.py │ │ ├── rope_utils.py │ │ ├── vision_config.py │ │ ├── vision_model.py │ │ └── vision_transformer_block.py │ ├── readme.md │ ├── registry.py │ ├── saver.py │ ├── util.py │ └── weight_converter.py ├── qwen2 │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── qwen2_loader.py │ │ ├── qwen2_loader_depracated.py │ │ └── qwen2_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_qwen2_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── apertus.py │ ├── dense_common.py │ ├── glm4v.py │ ├── kimi_vl.py │ ├── llama.py │ ├── monkey_patch.py │ ├── npu_patch.py │ ├── qwen2.py │ ├── qwen2_vl.py │ └── qwen3_vl.py └── weight_loader_registry.py ├── protocol.py ├── py.typed ├── single_controller ├── __init__.py ├── base │ ├── __init__.py │ ├── decorator.py │ ├── worker.py │ └── worker_group.py └── ray │ ├── __init__.py │ └── base.py ├── third_party ├── __init__.py ├── sglang │ ├── __init__.py │ └── parallel_state.py ├── torch │ ├── __init__.py │ └── distributed │ │ ├── __init__.py │ │ ├── _state_dict_utils.py │ │ └── checkpoint │ │ ├── __init__.py │ │ └── state_dict.py └── vllm │ └── __init__.py ├── tools ├── __init__.py ├── base_tool.py ├── geo3k_tool.py ├── gsm8k_tool.py ├── image_zoom_in_tool.py ├── mcp_base_tool.py ├── mcp_search_tool.py ├── sandbox_fusion_tools.py ├── schemas.py ├── search_tool.py └── utils │ ├── __init__.py │ ├── mcp_clients │ ├── McpClientManager.py │ └── utils.py │ ├── search_r1_like_utils.py │ └── tool_registry.py ├── trainer ├── __init__.py ├── config │ ├── __init__.py │ ├── _generated_ppo_megatron_trainer.yaml │ ├── _generated_ppo_trainer.yaml │ ├── actor │ │ ├── actor.yaml │ │ ├── dp_actor.yaml │ │ └── megatron_actor.yaml │ ├── algorithm.py │ ├── algorithm │ │ └── rollout_correction.yaml │ ├── config.py │ ├── critic │ │ ├── critic.yaml │ │ ├── dp_critic.yaml │ │ └── megatron_critic.yaml │ ├── data │ │ └── legacy_data.yaml │ ├── engine │ │ ├── fsdp.yaml │ │ └── megatron.yaml │ ├── evaluation.yaml │ ├── generation.yaml │ ├── model │ │ └── hf_model.yaml │ ├── npu_profile │ │ └── npu_profile.yaml │ ├── optim │ │ ├── fsdp.yaml │ │ └── megatron.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ ├── ref │ │ ├── dp_ref.yaml │ │ ├── megatron_ref.yaml │ │ └── ref.yaml │ ├── reward_manager.yaml │ ├── reward_model │ │ ├── dp_reward_model.yaml │ │ ├── megatron_reward_model.yaml │ │ └── reward_model.yaml │ ├── rollout │ │ └── rollout.yaml │ ├── sft_trainer.yaml │ └── sft_trainer_engine.yaml ├── constants_ppo.py ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_generation.py ├── main_generation_server.py ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── core_algos.py │ ├── metric_utils.py │ ├── ray_trainer.py │ ├── reward.py │ ├── rollout_corr_helper.py │ └── utils.py ├── runtime_env.yaml ├── sft_trainer.py └── sft_trainer_ray.py ├── utils ├── __init__.py ├── activation_offload.py ├── attention_utils.py ├── chat_template.py ├── checkpoint │ ├── __init__.py │ ├── checkpoint_handler.py │ ├── checkpoint_manager.py │ ├── fsdp_checkpoint_manager.py │ └── megatron_checkpoint_manager.py ├── config.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── dataset_utils.py │ ├── multiturn_sft_dataset.py │ ├── rl_dataset.py │ ├── rm_dataset.py │ ├── sft_dataset.py │ └── vision_utils.py ├── debug │ ├── __init__.py │ ├── metrics.py │ ├── performance.py │ └── trajectory_tracker.py ├── device.py ├── distributed.py ├── experimental │ ├── __init__.py │ └── torch_functional.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── groupwise.py ├── hdfs_io.py ├── import_utils.py ├── kernel │ ├── __init__.py │ ├── kernels.py │ └── linear_cross_entropy.py ├── logger │ ├── __init__.py │ └── aggregate_logger.py ├── logging_utils.py ├── megatron │ ├── __init__.py │ ├── dist_checkpointing.py │ ├── memory.py │ ├── optimizer.py │ ├── pipeline_parallel.py │ ├── router_replay_patch.py │ ├── router_replay_utils.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_peft_utils.py ├── megatron_utils.py ├── memory_buffer.py ├── memory_utils.py ├── metric │ ├── __init__.py │ └── utils.py ├── model.py ├── net_utils.py ├── npu_flash_attn_utils.py ├── profiler │ ├── __init__.py │ ├── config.py │ ├── empty_annotations.py │ ├── mstx_profile.py │ ├── nvtx_profile.py │ ├── performance.py │ └── profile.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── math_batch.py │ ├── math_dapo.py │ ├── math_reward.py │ ├── math_verify.py │ ├── prime_code │ │ ├── README.md │ │ ├── __init__.py │ │ ├── testing_util.py │ │ └── utils.py │ ├── prime_math │ │ ├── __init__.py │ │ ├── grader.py │ │ └── math_normalize.py │ ├── sandbox_fusion │ │ ├── __init__.py │ │ └── utils.py │ └── search_r1_like_qa_em.py ├── rollout_skip.py ├── rollout_trace.py ├── seqlen_balancing.py ├── tensordict_utils.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py ├── transferqueue_utils.py ├── transformers_compat.py ├── ulysses.py └── vllm │ ├── __init__.py │ ├── patch.py │ ├── utils.py │ └── vllm_fp8_utils.py ├── version └── version └── workers ├── __init__.py ├── actor ├── __init__.py ├── base.py ├── dp_actor.py └── megatron_actor.py ├── config ├── __init__.py ├── actor.py ├── critic.py ├── engine.py ├── megatron_peft.py ├── model.py ├── optimizer.py ├── reward_model.py └── rollout.py ├── critic ├── __init__.py ├── base.py ├── dp_critic.py └── megatron_critic.py ├── engine ├── __init__.py ├── base.py ├── fsdp │ ├── __init__.py │ ├── transformer_impl.py │ └── utils.py ├── megatron │ ├── __init__.py │ ├── transformer_impl.py │ └── utils.py ├── mindspeed │ ├── __init__.py │ └── transformer_impl.py └── utils.py ├── engine_workers.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_manager ├── __init__.py ├── abstract.py ├── batch.py ├── dapo.py ├── naive.py ├── prime.py └── registry.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── base.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ └── naive_rollout.py ├── replica.py ├── schemas.py ├── sglang_rollout │ ├── __init__.py │ ├── async_sglang_server.py │ ├── http_server_engine.py │ ├── sglang_rollout.py │ └── utils.py ├── tokenizer.py ├── utils.py └── vllm_rollout │ ├── __init__.py │ ├── utils.py │ ├── vllm_async_server.py │ └── vllm_rollout.py ├── sharding_manager ├── __init__.py ├── base.py └── fsdp_ulysses.py └── utils ├── __init__.py ├── losses.py └── padding.py /.gemini/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.gemini/config.yaml -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/ISSUE_TEMPLATE/bug-report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 0.1 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/ISSUE_TEMPLATE/feature-request.yml -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/.deprecate/e2e_eval_aime24.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/.deprecate/e2e_eval_aime24.yml -------------------------------------------------------------------------------- /.github/workflows/.deprecate/e2e_ppo_trainer.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/.deprecate/e2e_ppo_trainer.yml -------------------------------------------------------------------------------- /.github/workflows/.deprecate/e2e_prime.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/.deprecate/e2e_prime.yml -------------------------------------------------------------------------------- /.github/workflows/.deprecate/e2e_spin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/.deprecate/e2e_spin.yml -------------------------------------------------------------------------------- /.github/workflows/.deprecate/e2e_sppo.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/.deprecate/e2e_sppo.yml -------------------------------------------------------------------------------- /.github/workflows/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/README.md -------------------------------------------------------------------------------- /.github/workflows/check-pr-title.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/check-pr-title.yml -------------------------------------------------------------------------------- /.github/workflows/checkpoint_converter.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/checkpoint_converter.yml -------------------------------------------------------------------------------- /.github/workflows/cpu_unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/cpu_unit_tests.yml -------------------------------------------------------------------------------- /.github/workflows/doc.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/doc.yml -------------------------------------------------------------------------------- /.github/workflows/docker-build-ascend-a2.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/docker-build-ascend-a2.yml -------------------------------------------------------------------------------- /.github/workflows/docker-build-ascend-a3.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/docker-build-ascend-a3.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_ascend.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_ascend.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_dapo.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_dapo.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_fully_async_policy.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_fully_async_policy.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_genrm_remote.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_genrm_remote.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_one_step_off_policy.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_one_step_off_policy.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_ppo_trainer.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_ppo_trainer.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_sft.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_sft.yml -------------------------------------------------------------------------------- /.github/workflows/e2e_transferqueue.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/e2e_transferqueue.yml -------------------------------------------------------------------------------- /.github/workflows/gpu_unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/gpu_unit_tests.yml -------------------------------------------------------------------------------- /.github/workflows/model.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/model.yml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.github/workflows/reward_model_sglang.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/reward_model_sglang.yml -------------------------------------------------------------------------------- /.github/workflows/reward_model_vllm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/reward_model_vllm.yml -------------------------------------------------------------------------------- /.github/workflows/sanity.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/sanity.yml -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/scorecard.yml -------------------------------------------------------------------------------- /.github/workflows/secrets_scan.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/secrets_scan.yml -------------------------------------------------------------------------------- /.github/workflows/sgl.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/sgl.yml -------------------------------------------------------------------------------- /.github/workflows/type-coverage-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/type-coverage-check.yml -------------------------------------------------------------------------------- /.github/workflows/vllm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.github/workflows/vllm.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/LICENSE -------------------------------------------------------------------------------- /Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/README.md -------------------------------------------------------------------------------- /docker/Dockerfile.isaaclab230: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/Dockerfile.isaaclab230 -------------------------------------------------------------------------------- /docker/Dockerfile.stable.sglang: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/Dockerfile.stable.sglang -------------------------------------------------------------------------------- /docker/Dockerfile.stable.vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/Dockerfile.stable.vllm -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/README.md -------------------------------------------------------------------------------- /docker/ascend/Dockerfile.ascend_8.2.rc1_a2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/ascend/Dockerfile.ascend_8.2.rc1_a2 -------------------------------------------------------------------------------- /docker/ascend/Dockerfile.ascend_8.2.rc1_a3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/ascend/Dockerfile.ascend_8.2.rc1_a3 -------------------------------------------------------------------------------- /docker/ascend/Dockerfile.ascend_8.3.rc1_a2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/ascend/Dockerfile.ascend_8.3.rc1_a2 -------------------------------------------------------------------------------- /docker/ascend/Dockerfile.ascend_8.3.rc1_a3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/ascend/Dockerfile.ascend_8.3.rc1_a3 -------------------------------------------------------------------------------- /docker/aws/Dockerfile.extention.awsefa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/aws/Dockerfile.extention.awsefa -------------------------------------------------------------------------------- /docker/aws/Dockerfile.ngc.vllm0.8.sagemaker: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/aws/Dockerfile.ngc.vllm0.8.sagemaker -------------------------------------------------------------------------------- /docker/rocm/Apptainerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/rocm/Apptainerfile.rocm -------------------------------------------------------------------------------- /docker/rocm/Dockerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/rocm/Dockerfile.rocm -------------------------------------------------------------------------------- /docker/rocm/Dockerfile.rocm7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/rocm/Dockerfile.rocm7 -------------------------------------------------------------------------------- /docker/rocm/Dockerfile.rocm_verl-0.3.0.post1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/rocm/Dockerfile.rocm_verl-0.3.0.post1 -------------------------------------------------------------------------------- /docker/rocm/Dockerfile.rocm_verl-0.4.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/rocm/Dockerfile.rocm_verl-0.4.1 -------------------------------------------------------------------------------- /docker/verl0.4-cu124-torch2.6-fa2.7.4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/verl0.4-cu124-torch2.6-fa2.7.4/README.md -------------------------------------------------------------------------------- /docker/verl0.5-cu126-torch2.7-fa2.7.4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docker/verl0.5-cu126-torch2.7-fa2.7.4/README.md -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/README_vllm0.7.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/README_vllm0.7.md -------------------------------------------------------------------------------- /docs/README_vllm0.8.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/README_vllm0.8.md -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/_static/custom.css -------------------------------------------------------------------------------- /docs/_static/js/resizable-sidebar.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/_static/js/resizable-sidebar.js -------------------------------------------------------------------------------- /docs/_static/js/runllm-widget.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/_static/js/runllm-widget.js -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/advance/agent_loop.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/agent_loop.rst -------------------------------------------------------------------------------- /docs/advance/async-on-policy-distill.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/async-on-policy-distill.md -------------------------------------------------------------------------------- /docs/advance/attention_implementation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/attention_implementation.rst -------------------------------------------------------------------------------- /docs/advance/checkpoint.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/checkpoint.rst -------------------------------------------------------------------------------- /docs/advance/dpo_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/dpo_extension.rst -------------------------------------------------------------------------------- /docs/advance/fp8.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/fp8.md -------------------------------------------------------------------------------- /docs/advance/fsdp_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/fsdp_extension.rst -------------------------------------------------------------------------------- /docs/advance/fully_async.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/fully_async.md -------------------------------------------------------------------------------- /docs/advance/grafana_prometheus.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/grafana_prometheus.md -------------------------------------------------------------------------------- /docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/megatron_extension.rst -------------------------------------------------------------------------------- /docs/advance/one_step_off.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/one_step_off.md -------------------------------------------------------------------------------- /docs/advance/placement.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/placement.rst -------------------------------------------------------------------------------- /docs/advance/ppo_lora.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/ppo_lora.rst -------------------------------------------------------------------------------- /docs/advance/reward_loop.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/reward_loop.rst -------------------------------------------------------------------------------- /docs/advance/rollout_skip.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/rollout_skip.rst -------------------------------------------------------------------------------- /docs/advance/rollout_trace.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/rollout_trace.rst -------------------------------------------------------------------------------- /docs/advance/rope.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/advance/rope.rst -------------------------------------------------------------------------------- /docs/algo/baseline.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/baseline.md -------------------------------------------------------------------------------- /docs/algo/collabllm.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/collabllm.md -------------------------------------------------------------------------------- /docs/algo/dapo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/dapo.md -------------------------------------------------------------------------------- /docs/algo/entropy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/entropy.md -------------------------------------------------------------------------------- /docs/algo/gpg.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/gpg.md -------------------------------------------------------------------------------- /docs/algo/grpo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/grpo.md -------------------------------------------------------------------------------- /docs/algo/opo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/opo.md -------------------------------------------------------------------------------- /docs/algo/ppo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/ppo.md -------------------------------------------------------------------------------- /docs/algo/rollout_corr.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/rollout_corr.md -------------------------------------------------------------------------------- /docs/algo/rollout_corr_math.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/rollout_corr_math.md -------------------------------------------------------------------------------- /docs/algo/spin.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/spin.md -------------------------------------------------------------------------------- /docs/algo/sppo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/algo/sppo.md -------------------------------------------------------------------------------- /docs/amd_tutorial/amd_build_dockerfile_page.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/amd_tutorial/amd_build_dockerfile_page.rst -------------------------------------------------------------------------------- /docs/amd_tutorial/amd_vllm_page.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/amd_tutorial/amd_vllm_page.rst -------------------------------------------------------------------------------- /docs/api/data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/api/data.rst -------------------------------------------------------------------------------- /docs/api/single_controller.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/api/single_controller.rst -------------------------------------------------------------------------------- /docs/api/trainer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/api/trainer.rst -------------------------------------------------------------------------------- /docs/api/utils.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/api/utils.rst -------------------------------------------------------------------------------- /docs/ascend_tutorial/ascend_consistency.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/ascend_tutorial/ascend_consistency.rst -------------------------------------------------------------------------------- /docs/ascend_tutorial/ascend_profiling_en.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/ascend_tutorial/ascend_profiling_en.rst -------------------------------------------------------------------------------- /docs/ascend_tutorial/ascend_profiling_zh.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/ascend_tutorial/ascend_profiling_zh.rst -------------------------------------------------------------------------------- /docs/ascend_tutorial/ascend_quick_start.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/ascend_tutorial/ascend_quick_start.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/data/transfer_queue.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/data/transfer_queue.md -------------------------------------------------------------------------------- /docs/examples/config.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/examples/config.rst -------------------------------------------------------------------------------- /docs/examples/gsm8k_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/examples/gsm8k_example.rst -------------------------------------------------------------------------------- /docs/examples/multi_modal_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/examples/multi_modal_example.rst -------------------------------------------------------------------------------- /docs/examples/ppo_code_architecture.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/examples/ppo_code_architecture.rst -------------------------------------------------------------------------------- /docs/examples/sandbox_fusion_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/examples/sandbox_fusion_example.rst -------------------------------------------------------------------------------- /docs/examples/skypilot_examples.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/examples/skypilot_examples.rst -------------------------------------------------------------------------------- /docs/faq/faq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/faq/faq.rst -------------------------------------------------------------------------------- /docs/hybrid_flow.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/hybrid_flow.rst -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/perf/best_practices.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/perf/best_practices.rst -------------------------------------------------------------------------------- /docs/perf/device_tuning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/perf/device_tuning.rst -------------------------------------------------------------------------------- /docs/perf/dpsk.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/perf/dpsk.md -------------------------------------------------------------------------------- /docs/perf/nsight_profiling.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/perf/nsight_profiling.md -------------------------------------------------------------------------------- /docs/perf/perf_tuning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/perf/perf_tuning.rst -------------------------------------------------------------------------------- /docs/perf/verl_profiler_system.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/perf/verl_profiler_system.md -------------------------------------------------------------------------------- /docs/preparation/prepare_data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/preparation/prepare_data.rst -------------------------------------------------------------------------------- /docs/preparation/reward_function.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/preparation/reward_function.rst -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/sglang_multiturn/interaction_system.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/sglang_multiturn/interaction_system.rst -------------------------------------------------------------------------------- /docs/sglang_multiturn/multiturn.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/sglang_multiturn/multiturn.rst -------------------------------------------------------------------------------- /docs/sglang_multiturn/sandbox_fusion.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/sglang_multiturn/sandbox_fusion.rst -------------------------------------------------------------------------------- /docs/sglang_multiturn/search_tool_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/sglang_multiturn/search_tool_example.rst -------------------------------------------------------------------------------- /docs/single_controller.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/single_controller.rst -------------------------------------------------------------------------------- /docs/start/agentic_rl.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/start/agentic_rl.rst -------------------------------------------------------------------------------- /docs/start/install.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/start/install.rst -------------------------------------------------------------------------------- /docs/start/more_resources.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/start/more_resources.rst -------------------------------------------------------------------------------- /docs/start/multinode.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/start/multinode.rst -------------------------------------------------------------------------------- /docs/start/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/start/quickstart.rst -------------------------------------------------------------------------------- /docs/start/ray_debug_tutorial.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/start/ray_debug_tutorial.rst -------------------------------------------------------------------------------- /docs/workers/fsdp_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/workers/fsdp_workers.rst -------------------------------------------------------------------------------- /docs/workers/megatron_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/workers/megatron_workers.rst -------------------------------------------------------------------------------- /docs/workers/model_engine.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/workers/model_engine.rst -------------------------------------------------------------------------------- /docs/workers/ray_trainer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/workers/ray_trainer.rst -------------------------------------------------------------------------------- /docs/workers/sglang_worker.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/docs/workers/sglang_worker.rst -------------------------------------------------------------------------------- /examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /examples/data_preprocess/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/data_preprocess/geo3k.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k_multiturn_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/data_preprocess/gsm8k_multiturn_sft.py -------------------------------------------------------------------------------- /examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /examples/data_preprocess/multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/data_preprocess/multiturn.py -------------------------------------------------------------------------------- /examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/generation/run_deepseek7b_mutli_node.sh -------------------------------------------------------------------------------- /examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/generation/run_deepseek_v2_lite_math.sh -------------------------------------------------------------------------------- /examples/gmpo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gmpo_trainer/README.md -------------------------------------------------------------------------------- /examples/gmpo_trainer/run_qwen2_5-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gmpo_trainer/run_qwen2_5-7b_math.sh -------------------------------------------------------------------------------- /examples/gmpo_trainer/test_dapo_7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gmpo_trainer/test_dapo_7b_math.sh -------------------------------------------------------------------------------- /examples/gpg_trainer/gpg.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gpg_trainer/gpg.md -------------------------------------------------------------------------------- /examples/gpg_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gpg_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/README.md -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_deepseek7b_llm_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_glm41v_9b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_glm41v_9b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_gptoss_20b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_gptoss_20b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_minicpmo2_6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_minicpmo2_6.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_7b_grpo_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2_5_7b_grpo_npu.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_vl-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2_5_vl-7b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_vl-7b_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2_5_vl-7b_lora.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_vl_32b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2_5_vl_32b_npu.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_vl_3b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2_5_vl_3b_npu.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_vl_7b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen2_5_vl_7b_npu.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen3-32b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen3-32b_npu.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen3-8b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen3-8b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_qwen3-8b_npu.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_seed_oss_36b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/grpo_trainer/run_seed_oss_36b.sh -------------------------------------------------------------------------------- /examples/gspo_trainer/run_qwen30b_gspo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gspo_trainer/run_qwen30b_gspo.sh -------------------------------------------------------------------------------- /examples/gspo_trainer/test_gspo_3b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gspo_trainer/test_gspo_3b_math.sh -------------------------------------------------------------------------------- /examples/gspo_trainer/test_gspo_3b_math_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/gspo_trainer/test_gspo_3b_math_slurm.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/README.md -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_gemma.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_qwen2-7b_rm.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2.5-32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_qwen2.5-32b.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen3-8b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ppo_trainer/run_qwen3-8b_npu.sh -------------------------------------------------------------------------------- /examples/ray/tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/ray/tutorial.ipynb -------------------------------------------------------------------------------- /examples/rloo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/rloo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /examples/router_replay/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/router_replay/README.md -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/gsm8k/run_deepseek_6b7.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/gsm8k/run_gemma_2b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/gsm8k/run_gemma_7b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/gsm8k/run_qwen_05_peft.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/gsm8k/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_seed_oss_36b_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/gsm8k/run_seed_oss_36b_sft.sh -------------------------------------------------------------------------------- /examples/sft/multiturn/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sft/multiturn/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /examples/sglang_multiturn/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/sglang_multiturn/README.md -------------------------------------------------------------------------------- /examples/skypilot/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/skypilot/README.md -------------------------------------------------------------------------------- /examples/skypilot/verl-grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/skypilot/verl-grpo.yaml -------------------------------------------------------------------------------- /examples/skypilot/verl-multiturn-tools.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/skypilot/verl-multiturn-tools.yaml -------------------------------------------------------------------------------- /examples/skypilot/verl-ppo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/skypilot/verl-ppo.yaml -------------------------------------------------------------------------------- /examples/slurm/ray_on_slurm.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/slurm/ray_on_slurm.slurm -------------------------------------------------------------------------------- /examples/split_placement/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/split_placement/README.md -------------------------------------------------------------------------------- /examples/split_placement/main_ppo_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/split_placement/main_ppo_split.py -------------------------------------------------------------------------------- /examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/split_placement/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/split_placement/split_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/examples/split_placement/split_monkey_patch.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/pyproject.toml -------------------------------------------------------------------------------- /recipe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/README.md -------------------------------------------------------------------------------- /recipe/collabllm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/README.md -------------------------------------------------------------------------------- /recipe/collabllm/collabllm_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/collabllm_agent_loop.py -------------------------------------------------------------------------------- /recipe/collabllm/collabllm_interation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/collabllm_interation.py -------------------------------------------------------------------------------- /recipe/collabllm/config/agent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/config/agent.yaml -------------------------------------------------------------------------------- /recipe/collabllm/metrics/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/metrics/accuracy.py -------------------------------------------------------------------------------- /recipe/collabllm/metrics/bleu_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/metrics/bleu_score.py -------------------------------------------------------------------------------- /recipe/collabllm/metrics/interactivity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/metrics/interactivity.py -------------------------------------------------------------------------------- /recipe/collabllm/metrics/pass_rate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/metrics/pass_rate.py -------------------------------------------------------------------------------- /recipe/collabllm/metrics/token_amount.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/metrics/token_amount.py -------------------------------------------------------------------------------- /recipe/collabllm/process_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/process_dataset.py -------------------------------------------------------------------------------- /recipe/collabllm/reward_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/reward_function.py -------------------------------------------------------------------------------- /recipe/collabllm/train_rl_collabllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/train_rl_collabllm.sh -------------------------------------------------------------------------------- /recipe/collabllm/train_sft_collabllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/train_sft_collabllm.sh -------------------------------------------------------------------------------- /recipe/collabllm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/collabllm/utils.py -------------------------------------------------------------------------------- /recipe/dapo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/README.md -------------------------------------------------------------------------------- /recipe/dapo/config/dapo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/config/dapo_megatron_trainer.yaml -------------------------------------------------------------------------------- /recipe/dapo/config/dapo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/config/dapo_trainer.yaml -------------------------------------------------------------------------------- /recipe/dapo/dapo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/dapo_ray_trainer.py -------------------------------------------------------------------------------- /recipe/dapo/main_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/main_dapo.py -------------------------------------------------------------------------------- /recipe/dapo/prepare_dapo_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/prepare_dapo_data.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_early_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_early_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen2.5_32b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_qwen2.5_32b_npu.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen2.5_32b_rollout_corr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_qwen2.5_32b_rollout_corr.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen2.5_7b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_qwen2.5_7b_npu.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen3_14b_base_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_qwen3_14b_base_npu.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen3_8b_base_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_qwen3_8b_base_npu.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/runtime_env.yaml -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_7b.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_7b_math.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b_math_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_7b_math_lora.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_7b_math_megatron.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_8b_megatron_fp16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_8b_megatron_fp16.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_8b_megatron_fp8train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_8b_megatron_fp8train.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_dspk_671b_megatron_96gb.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_dspk_671b_megatron_96gb.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_glm_air_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_glm_air_megatron.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_gptoss_20b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_gptoss_20b_megatron.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_qwen3_30b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/dapo/test_dapo_qwen3_30b_math.sh -------------------------------------------------------------------------------- /recipe/deepeyes/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/deepeyes/README.md -------------------------------------------------------------------------------- /recipe/deepeyes/deepeyes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/deepeyes/deepeyes.py -------------------------------------------------------------------------------- /recipe/deepeyes/run_deepeyes_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/deepeyes/run_deepeyes_grpo.sh -------------------------------------------------------------------------------- /recipe/entropy/32b_clip_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/32b_clip_cov.sh -------------------------------------------------------------------------------- /recipe/entropy/32b_kl_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/32b_kl_cov.sh -------------------------------------------------------------------------------- /recipe/entropy/32b_kl_cov_mininbsz.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/32b_kl_cov_mininbsz.sh -------------------------------------------------------------------------------- /recipe/entropy/7b_clip_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/7b_clip_cov.sh -------------------------------------------------------------------------------- /recipe/entropy/7b_kl_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/7b_kl_cov.sh -------------------------------------------------------------------------------- /recipe/entropy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/README.md -------------------------------------------------------------------------------- /recipe/entropy/config/entropy_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/config/entropy_trainer.yaml -------------------------------------------------------------------------------- /recipe/entropy/entropy_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/entropy_ray_trainer.py -------------------------------------------------------------------------------- /recipe/entropy/main_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/main_entropy.py -------------------------------------------------------------------------------- /recipe/entropy/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/reward.py -------------------------------------------------------------------------------- /recipe/entropy/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/entropy/reward_score/__init__.py -------------------------------------------------------------------------------- /recipe/fapo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/README.md -------------------------------------------------------------------------------- /recipe/fapo/config/rm_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/config/rm_config.yaml -------------------------------------------------------------------------------- /recipe/fapo/prepare_fapo_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/prepare_fapo_data.py -------------------------------------------------------------------------------- /recipe/fapo/reward_fn_genrm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/reward_fn_genrm.py -------------------------------------------------------------------------------- /recipe/fapo/reward_fn_reasoning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/reward_fn_reasoning.py -------------------------------------------------------------------------------- /recipe/fapo/reward_fn_reasoning_remote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/reward_fn_reasoning_remote.py -------------------------------------------------------------------------------- /recipe/fapo/run_baseline_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/run_baseline_32b.sh -------------------------------------------------------------------------------- /recipe/fapo/run_baseline_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/run_baseline_7b.sh -------------------------------------------------------------------------------- /recipe/fapo/run_fapo_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/run_fapo_32b.sh -------------------------------------------------------------------------------- /recipe/fapo/run_fapo_32b_remote.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/run_fapo_32b_remote.sh -------------------------------------------------------------------------------- /recipe/fapo/run_fapo_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/run_fapo_7b.sh -------------------------------------------------------------------------------- /recipe/fapo/run_fapo_7b_remote.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/run_fapo_7b_remote.sh -------------------------------------------------------------------------------- /recipe/fapo/run_fapo_genrm_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/run_fapo_genrm_train.sh -------------------------------------------------------------------------------- /recipe/fapo/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fapo/runtime_env.yaml -------------------------------------------------------------------------------- /recipe/flowrl/FLOWRL_SIMPLE_GUIDE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/FLOWRL_SIMPLE_GUIDE.md -------------------------------------------------------------------------------- /recipe/flowrl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/README.md -------------------------------------------------------------------------------- /recipe/flowrl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/__init__.py -------------------------------------------------------------------------------- /recipe/flowrl/config/flowrl_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/config/flowrl_trainer.yaml -------------------------------------------------------------------------------- /recipe/flowrl/figures/file.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/figures/file.svg -------------------------------------------------------------------------------- /recipe/flowrl/figures/flowrl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/figures/flowrl.pdf -------------------------------------------------------------------------------- /recipe/flowrl/figures/flowrl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/figures/flowrl.png -------------------------------------------------------------------------------- /recipe/flowrl/flowrl_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/flowrl_actor.py -------------------------------------------------------------------------------- /recipe/flowrl/flowrl_fsdp_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/flowrl_fsdp_worker.py -------------------------------------------------------------------------------- /recipe/flowrl/flowrl_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/flowrl_ray_trainer.py -------------------------------------------------------------------------------- /recipe/flowrl/main_flowrl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/main_flowrl.py -------------------------------------------------------------------------------- /recipe/flowrl/prepare/prepare_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/prepare/prepare_data.sh -------------------------------------------------------------------------------- /recipe/flowrl/prepare/prepare_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/prepare/prepare_model.sh -------------------------------------------------------------------------------- /recipe/flowrl/run_flowrl_qwen2.5_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/flowrl/run_flowrl_qwen2.5_7b.sh -------------------------------------------------------------------------------- /recipe/fully_async_policy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/README.md -------------------------------------------------------------------------------- /recipe/fully_async_policy/README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/README_zh.md -------------------------------------------------------------------------------- /recipe/fully_async_policy/agent_loop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/agent_loop/__init__.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/detach_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/detach_utils.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/fsdp2_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/fsdp2_utils.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/fsdp_workers.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/fully_async_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/fully_async_main.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/fully_async_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/fully_async_trainer.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/megatron_utils.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/megatron_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/megatron_worker.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/message_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/message_queue.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/param_sync.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/param_sync.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/ray_trainer.py -------------------------------------------------------------------------------- /recipe/fully_async_policy/shell/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/fully_async_policy/shell/runtime_env.yaml -------------------------------------------------------------------------------- /recipe/genrm_remote/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/genrm_remote/README.md -------------------------------------------------------------------------------- /recipe/genrm_remote/reward_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/genrm_remote/reward_function.py -------------------------------------------------------------------------------- /recipe/genrm_remote/run_genrm_remote.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/genrm_remote/run_genrm_remote.sh -------------------------------------------------------------------------------- /recipe/gkd/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/README.md -------------------------------------------------------------------------------- /recipe/gkd/config/on_policy_distill_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/config/on_policy_distill_trainer.yaml -------------------------------------------------------------------------------- /recipe/gkd/config/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/config/runtime_env.yaml -------------------------------------------------------------------------------- /recipe/gkd/main_gkd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/main_gkd.py -------------------------------------------------------------------------------- /recipe/gkd/megatron_kl_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/megatron_kl_loss.py -------------------------------------------------------------------------------- /recipe/gkd/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/megatron_utils.py -------------------------------------------------------------------------------- /recipe/gkd/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/megatron_workers.py -------------------------------------------------------------------------------- /recipe/gkd/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/ray_trainer.py -------------------------------------------------------------------------------- /recipe/gkd/run_moonlight_dsv3_training.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/run_moonlight_dsv3_training.sh -------------------------------------------------------------------------------- /recipe/gkd/teacher/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/__init__.py -------------------------------------------------------------------------------- /recipe/gkd/teacher/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/client.py -------------------------------------------------------------------------------- /recipe/gkd/teacher/join_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/join_server.sh -------------------------------------------------------------------------------- /recipe/gkd/teacher/proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/proxy.py -------------------------------------------------------------------------------- /recipe/gkd/teacher/start_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/start_server.sh -------------------------------------------------------------------------------- /recipe/gkd/teacher/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/utils.py -------------------------------------------------------------------------------- /recipe/gkd/teacher/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/vllm_engine.py -------------------------------------------------------------------------------- /recipe/gkd/teacher/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher/worker.py -------------------------------------------------------------------------------- /recipe/gkd/teacher_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/teacher_utils.py -------------------------------------------------------------------------------- /recipe/gkd/test_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/test_qwen.sh -------------------------------------------------------------------------------- /recipe/gkd/test_qwen_sglang.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/test_qwen_sglang.sh -------------------------------------------------------------------------------- /recipe/gkd/test_teacher_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/gkd/test_teacher_server.py -------------------------------------------------------------------------------- /recipe/infigui-g1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/infigui-g1/README.md -------------------------------------------------------------------------------- /recipe/infigui-g1/reward_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/infigui-g1/reward_fn.py -------------------------------------------------------------------------------- /recipe/infigui-g1/run_3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/infigui-g1/run_3b.sh -------------------------------------------------------------------------------- /recipe/infigui-g1/run_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/infigui-g1/run_7b.sh -------------------------------------------------------------------------------- /recipe/minicpmo/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/minicpmo/rl_dataset.py -------------------------------------------------------------------------------- /recipe/one_step_off_policy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/one_step_off_policy/README.md -------------------------------------------------------------------------------- /recipe/one_step_off_policy/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/one_step_off_policy/distributed_util.py -------------------------------------------------------------------------------- /recipe/one_step_off_policy/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/one_step_off_policy/fsdp_workers.py -------------------------------------------------------------------------------- /recipe/one_step_off_policy/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/one_step_off_policy/main_ppo.py -------------------------------------------------------------------------------- /recipe/one_step_off_policy/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/one_step_off_policy/megatron_workers.py -------------------------------------------------------------------------------- /recipe/one_step_off_policy/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/one_step_off_policy/ray_trainer.py -------------------------------------------------------------------------------- /recipe/one_step_off_policy/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/one_step_off_policy/utils.py -------------------------------------------------------------------------------- /recipe/open_math_reasoning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/open_math_reasoning/README.md -------------------------------------------------------------------------------- /recipe/open_math_reasoning/compute_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/open_math_reasoning/compute_score.py -------------------------------------------------------------------------------- /recipe/open_math_reasoning/run_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/open_math_reasoning/run_eval.sh -------------------------------------------------------------------------------- /recipe/open_math_reasoning/run_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/open_math_reasoning/run_generation.sh -------------------------------------------------------------------------------- /recipe/open_math_reasoning/run_sft_qwen3_8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/open_math_reasoning/run_sft_qwen3_8b.sh -------------------------------------------------------------------------------- /recipe/prime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/__init__.py -------------------------------------------------------------------------------- /recipe/prime/config/prime_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/config/prime_trainer.yaml -------------------------------------------------------------------------------- /recipe/prime/main_prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/main_prime.py -------------------------------------------------------------------------------- /recipe/prime/prime_core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/prime_core_algos.py -------------------------------------------------------------------------------- /recipe/prime/prime_dp_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/prime_dp_rm.py -------------------------------------------------------------------------------- /recipe/prime/prime_fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/prime_fsdp_workers.py -------------------------------------------------------------------------------- /recipe/prime/prime_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/prime_ray_trainer.py -------------------------------------------------------------------------------- /recipe/prime/run_prime_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/run_prime_qwen.sh -------------------------------------------------------------------------------- /recipe/prime/run_prime_qwen_code.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/prime/run_prime_qwen_code.sh -------------------------------------------------------------------------------- /recipe/r1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/README.md -------------------------------------------------------------------------------- /recipe/r1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/__init__.py -------------------------------------------------------------------------------- /recipe/r1/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/config/evaluation.yaml -------------------------------------------------------------------------------- /recipe/r1/data_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/data_process.py -------------------------------------------------------------------------------- /recipe/r1/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/main_eval.py -------------------------------------------------------------------------------- /recipe/r1/reward_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/reward_score.py -------------------------------------------------------------------------------- /recipe/r1/run_r1_distill_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/run_r1_distill_qwen.sh -------------------------------------------------------------------------------- /recipe/r1/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/tasks/__init__.py -------------------------------------------------------------------------------- /recipe/r1/tasks/gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/tasks/gpqa.py -------------------------------------------------------------------------------- /recipe/r1/tasks/livecodebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/tasks/livecodebench.py -------------------------------------------------------------------------------- /recipe/r1/tasks/math_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1/tasks/math_reward.py -------------------------------------------------------------------------------- /recipe/r1_ascend/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/README.md -------------------------------------------------------------------------------- /recipe/r1_ascend/README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/README_zh.md -------------------------------------------------------------------------------- /recipe/r1_ascend/deepscaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/deepscaler.py -------------------------------------------------------------------------------- /recipe/r1_ascend/engine_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/engine_core.py -------------------------------------------------------------------------------- /recipe/r1_ascend/figures/response_len.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/figures/response_len.png -------------------------------------------------------------------------------- /recipe/r1_ascend/figures/rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/figures/rewards.png -------------------------------------------------------------------------------- /recipe/r1_ascend/figures/val_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/figures/val_score.png -------------------------------------------------------------------------------- /recipe/r1_ascend/json_to_parquet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/json_to_parquet.py -------------------------------------------------------------------------------- /recipe/r1_ascend/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/main_ppo.py -------------------------------------------------------------------------------- /recipe/r1_ascend/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/megatron_workers.py -------------------------------------------------------------------------------- /recipe/r1_ascend/ray_start_grpo_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/ray_start_grpo_npu.sh -------------------------------------------------------------------------------- /recipe/r1_ascend/vllm_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/vllm_parallel_state.py -------------------------------------------------------------------------------- /recipe/r1_ascend/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/r1_ascend/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /recipe/spin/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/README.md -------------------------------------------------------------------------------- /recipe/spin/config/spin_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/config/spin_trainer.yaml -------------------------------------------------------------------------------- /recipe/spin/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/core_algos.py -------------------------------------------------------------------------------- /recipe/spin/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/dp_actor.py -------------------------------------------------------------------------------- /recipe/spin/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/fsdp_workers.py -------------------------------------------------------------------------------- /recipe/spin/main_spin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/main_spin.py -------------------------------------------------------------------------------- /recipe/spin/run_spin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/run_spin.sh -------------------------------------------------------------------------------- /recipe/spin/spin_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/spin_trainer.py -------------------------------------------------------------------------------- /recipe/spin/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/spin/utils.py -------------------------------------------------------------------------------- /recipe/sppo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/README.md -------------------------------------------------------------------------------- /recipe/sppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/__init__.py -------------------------------------------------------------------------------- /recipe/sppo/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/config.py -------------------------------------------------------------------------------- /recipe/sppo/config/sppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/config/sppo_trainer.yaml -------------------------------------------------------------------------------- /recipe/sppo/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/dp_actor.py -------------------------------------------------------------------------------- /recipe/sppo/main_sppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/main_sppo.py -------------------------------------------------------------------------------- /recipe/sppo/run_qwen2.5-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/run_qwen2.5-7b_rm.sh -------------------------------------------------------------------------------- /recipe/sppo/sppo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/sppo_ray_trainer.py -------------------------------------------------------------------------------- /recipe/sppo/sppo_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/sppo/sppo_worker.py -------------------------------------------------------------------------------- /recipe/transfer_queue/agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/transfer_queue/agent_loop.py -------------------------------------------------------------------------------- /recipe/transfer_queue/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/transfer_queue/main_ppo.py -------------------------------------------------------------------------------- /recipe/transfer_queue/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/transfer_queue/ray_trainer.py -------------------------------------------------------------------------------- /recipe/vla/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/README.md -------------------------------------------------------------------------------- /recipe/vla/config/rob_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/config/rob_ppo_trainer.yaml -------------------------------------------------------------------------------- /recipe/vla/dp_rob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/dp_rob.py -------------------------------------------------------------------------------- /recipe/vla/env_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/env_loop.py -------------------------------------------------------------------------------- /recipe/vla/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/__init__.py -------------------------------------------------------------------------------- /recipe/vla/envs/action_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/action_utils.py -------------------------------------------------------------------------------- /recipe/vla/envs/isaac_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/isaac_env/__init__.py -------------------------------------------------------------------------------- /recipe/vla/envs/isaac_env/isaac_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/isaac_env/isaac_env.py -------------------------------------------------------------------------------- /recipe/vla/envs/libero_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/libero_env/__init__.py -------------------------------------------------------------------------------- /recipe/vla/envs/libero_env/libero_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/libero_env/libero_env.py -------------------------------------------------------------------------------- /recipe/vla/envs/libero_env/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/libero_env/utils.py -------------------------------------------------------------------------------- /recipe/vla/envs/libero_env/venv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/envs/libero_env/venv.py -------------------------------------------------------------------------------- /recipe/vla/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/fsdp_workers.py -------------------------------------------------------------------------------- /recipe/vla/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/main_ppo.py -------------------------------------------------------------------------------- /recipe/vla/models/openvla_oft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/models/openvla_oft/__init__.py -------------------------------------------------------------------------------- /recipe/vla/models/openvla_oft/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/models/openvla_oft/constants.py -------------------------------------------------------------------------------- /recipe/vla/models/openvla_oft/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/models/openvla_oft/train_utils.py -------------------------------------------------------------------------------- /recipe/vla/naive_rollout_rob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/naive_rollout_rob.py -------------------------------------------------------------------------------- /recipe/vla/prepare_libero_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/prepare_libero_dataset.py -------------------------------------------------------------------------------- /recipe/vla/requirements_vla.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/requirements_vla.txt -------------------------------------------------------------------------------- /recipe/vla/rob_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/rob_ray_trainer.py -------------------------------------------------------------------------------- /recipe/vla/run_simpleVLA_isaac_disagg.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/run_simpleVLA_isaac_disagg.sh -------------------------------------------------------------------------------- /recipe/vla/run_simpleVLA_libero_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/run_simpleVLA_libero_grpo.sh -------------------------------------------------------------------------------- /recipe/vla/workers/env/env_loop_wg_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/workers/env/env_loop_wg_test.py -------------------------------------------------------------------------------- /recipe/vla/workers/env/env_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/workers/env/env_manager.py -------------------------------------------------------------------------------- /recipe/vla/workers/env/env_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/recipe/vla/workers/env/env_worker.py -------------------------------------------------------------------------------- /requirements-cuda.txt: -------------------------------------------------------------------------------- 1 | flash-attn -------------------------------------------------------------------------------- /requirements-npu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/requirements-npu.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_sglang.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/requirements_sglang.txt -------------------------------------------------------------------------------- /requirements_transferqueue.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/requirements_transferqueue.txt -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/converter_hf_to_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/converter_hf_to_mcore.py -------------------------------------------------------------------------------- /scripts/diagnose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/diagnose.py -------------------------------------------------------------------------------- /scripts/generate_trainer_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/generate_trainer_config.sh -------------------------------------------------------------------------------- /scripts/init_random_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/init_random_model.py -------------------------------------------------------------------------------- /scripts/install_vllm_sglang_mcore.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/install_vllm_sglang_mcore.sh -------------------------------------------------------------------------------- /scripts/legacy_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/legacy_model_merger.py -------------------------------------------------------------------------------- /scripts/print_cfg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/print_cfg.py -------------------------------------------------------------------------------- /scripts/rollout_viewer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/scripts/rollout_viewer.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/setup.py -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/README.md -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/experimental/agent_loop/agent_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/experimental/agent_loop/agent_utils.py -------------------------------------------------------------------------------- /tests/experimental/reward/reward_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/experimental/reward/reward_fn.py -------------------------------------------------------------------------------- /tests/experimental/vla/test_sim_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/experimental/vla/test_sim_envs.py -------------------------------------------------------------------------------- /tests/interactions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/interactions/__init__.py -------------------------------------------------------------------------------- /tests/interactions/test_gsm8k_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/interactions/test_gsm8k_interaction.py -------------------------------------------------------------------------------- /tests/interactions/test_interaction_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/interactions/test_interaction_registry.py -------------------------------------------------------------------------------- /tests/kill_github_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/kill_github_tests.sh -------------------------------------------------------------------------------- /tests/models/test_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/models/test_engine.py -------------------------------------------------------------------------------- /tests/models/test_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/models/test_transformer.py -------------------------------------------------------------------------------- /tests/models/test_transformers_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/models/test_transformers_ulysses.py -------------------------------------------------------------------------------- /tests/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/__init__.py -------------------------------------------------------------------------------- /tests/single_controller/base/test_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/base/test_decorator.py -------------------------------------------------------------------------------- /tests/single_controller/detached_worker/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/detached_worker/run.sh -------------------------------------------------------------------------------- /tests/single_controller/test_data_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/test_data_transfer.py -------------------------------------------------------------------------------- /tests/single_controller/test_decorator_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/test_decorator_on_cpu.py -------------------------------------------------------------------------------- /tests/single_controller/test_nested_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/test_nested_worker.py -------------------------------------------------------------------------------- /tests/single_controller/test_ray_collectives.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/test_ray_collectives.py -------------------------------------------------------------------------------- /tests/single_controller/test_ray_utils_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/test_ray_utils_on_cpu.py -------------------------------------------------------------------------------- /tests/single_controller/test_rvdz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/single_controller/test_rvdz.py -------------------------------------------------------------------------------- /tests/special_distributed/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_distributed/README.md -------------------------------------------------------------------------------- /tests/special_distributed/run_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_distributed/run_all.sh -------------------------------------------------------------------------------- /tests/special_distributed/test_fsdp_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_distributed/test_fsdp_ckpt.py -------------------------------------------------------------------------------- /tests/special_distributed/test_tensor_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /tests/special_e2e/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/README.md -------------------------------------------------------------------------------- /tests/special_e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/__init__.py -------------------------------------------------------------------------------- /tests/special_e2e/check_custom_rwd_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/check_custom_rwd_fn.py -------------------------------------------------------------------------------- /tests/special_e2e/check_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/check_results.py -------------------------------------------------------------------------------- /tests/special_e2e/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/envs/__init__.py -------------------------------------------------------------------------------- /tests/special_e2e/envs/digit_completion/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/envs/digit_completion/task.py -------------------------------------------------------------------------------- /tests/special_e2e/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/generation/run_gen_qwen05.sh -------------------------------------------------------------------------------- /tests/special_e2e/ppo_trainer/run_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/ppo_trainer/run_single_gpu.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_dapo.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_fully_async_policy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_fully_async_policy.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_genrm_remote.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_genrm_remote.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_grpo_lora_with_merge.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_grpo_lora_with_merge.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_one_step_off_policy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_one_step_off_policy.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_ppo_trainer_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_ppo_trainer_megatron.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_prime.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_prime.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_spin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_spin.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_sppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_sppo.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_test.sh -------------------------------------------------------------------------------- /tests/special_e2e/run_transferqueue.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/run_transferqueue.sh -------------------------------------------------------------------------------- /tests/special_e2e/sft/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/sft/run_sft.sh -------------------------------------------------------------------------------- /tests/special_e2e/sft/run_sft_engine_gsm8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/sft/run_sft_engine_gsm8k.sh -------------------------------------------------------------------------------- /tests/special_e2e/sft/test_sft_engine_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/sft/test_sft_engine_all.sh -------------------------------------------------------------------------------- /tests/special_e2e/sft/test_sp_loss_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_e2e/sft/test_sp_loss_match.py -------------------------------------------------------------------------------- /tests/special_npu/run_qwen2_5_05b_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_npu/run_qwen2_5_05b_dapo.sh -------------------------------------------------------------------------------- /tests/special_npu/run_qwen2_5_05b_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_npu/run_qwen2_5_05b_grpo.sh -------------------------------------------------------------------------------- /tests/special_npu/run_qwen2_5_vl_3b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_npu/run_qwen2_5_vl_3b_npu.sh -------------------------------------------------------------------------------- /tests/special_npu/run_qwen3_06b_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_npu/run_qwen3_06b_ppo.sh -------------------------------------------------------------------------------- /tests/special_sanity/check_api_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_api_docs.py -------------------------------------------------------------------------------- /tests/special_sanity/check_dataproto_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_dataproto_usage.py -------------------------------------------------------------------------------- /tests/special_sanity/check_device_api_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_device_api_usage.py -------------------------------------------------------------------------------- /tests/special_sanity/check_docs_time_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_docs_time_info.py -------------------------------------------------------------------------------- /tests/special_sanity/check_docstrings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_docstrings.py -------------------------------------------------------------------------------- /tests/special_sanity/check_license.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_license.py -------------------------------------------------------------------------------- /tests/special_sanity/check_pr_description.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_pr_description.py -------------------------------------------------------------------------------- /tests/special_sanity/check_pr_title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/check_pr_title.py -------------------------------------------------------------------------------- /tests/special_sanity/test_config_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/test_config_docs.py -------------------------------------------------------------------------------- /tests/special_sanity/test_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/test_import.py -------------------------------------------------------------------------------- /tests/special_sanity/type_coverage_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/type_coverage_check.py -------------------------------------------------------------------------------- /tests/special_sanity/validate_imported_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/validate_imported_docs.py -------------------------------------------------------------------------------- /tests/special_sanity/validate_structure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_sanity/validate_structure.py -------------------------------------------------------------------------------- /tests/special_standalone/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_standalone/README.md -------------------------------------------------------------------------------- /tests/special_standalone/test_memory_buffers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/special_standalone/test_memory_buffers.py -------------------------------------------------------------------------------- /tests/test_base_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/test_base_config_on_cpu.py -------------------------------------------------------------------------------- /tests/test_protocol_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/test_protocol_on_cpu.py -------------------------------------------------------------------------------- /tests/test_protocol_v2_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/test_protocol_v2_on_cpu.py -------------------------------------------------------------------------------- /tests/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/__init__.py -------------------------------------------------------------------------------- /tests/trainer/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/config/__init__.py -------------------------------------------------------------------------------- /tests/trainer/config/legacy_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/config/legacy_ppo_trainer.yaml -------------------------------------------------------------------------------- /tests/trainer/config/test_algo_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/config/test_algo_config_on_cpu.py -------------------------------------------------------------------------------- /tests/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /tests/trainer/ppo/test_core_algos_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/ppo/test_core_algos_on_cpu.py -------------------------------------------------------------------------------- /tests/trainer/ppo/test_metric_utils_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/ppo/test_metric_utils_on_cpu.py -------------------------------------------------------------------------------- /tests/trainer/ppo/test_rollout_corr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/trainer/ppo/test_rollout_corr.py -------------------------------------------------------------------------------- /tests/utils/_test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/_test_module.py -------------------------------------------------------------------------------- /tests/utils/ckpt/test_esi_save_ckpt_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/ckpt/test_esi_save_ckpt_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/dataset/test_rl_collate_fn_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/dataset/test_rl_collate_fn_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/dataset/test_rl_dataset_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/dataset/test_rl_dataset_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/dataset/test_sft_dataset_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/dataset/test_sft_dataset_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/debug/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/debug/test_metrics.py -------------------------------------------------------------------------------- /tests/utils/megatron/test_pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/megatron/test_pipeline_parallel.py -------------------------------------------------------------------------------- /tests/utils/reward_score/test_sandbox_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/reward_score/test_sandbox_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_activation_offload.py -------------------------------------------------------------------------------- /tests/utils/test_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_config_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_flops_counter.py -------------------------------------------------------------------------------- /tests/utils/test_fs_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_fs_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_groupwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_groupwise.py -------------------------------------------------------------------------------- /tests/utils/test_import_utils_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_import_utils_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_linear_cross_entropy.py -------------------------------------------------------------------------------- /tests/utils/test_mlflow_key_sanitization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_mlflow_key_sanitization.py -------------------------------------------------------------------------------- /tests/utils/test_model_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_model_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_nvtx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_nvtx_profile.py -------------------------------------------------------------------------------- /tests/utils/test_rollout_skip_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_rollout_skip_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_rollout_trace_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_rollout_trace_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_seqlen_balancing.py -------------------------------------------------------------------------------- /tests/utils/test_special_mstx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_special_mstx_profile.py -------------------------------------------------------------------------------- /tests/utils/test_temp_env_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_temp_env_on_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_timeout_decorator_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_timeout_decorator_cpu.py -------------------------------------------------------------------------------- /tests/utils/test_torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/utils/test_torch_functional.py -------------------------------------------------------------------------------- /tests/workers/actor/test_special_dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/actor/test_special_dp_actor.py -------------------------------------------------------------------------------- /tests/workers/config/test_actor_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/config/test_actor_config_on_cpu.py -------------------------------------------------------------------------------- /tests/workers/config/test_optim_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/config/test_optim_config_on_cpu.py -------------------------------------------------------------------------------- /tests/workers/critic/test_special_dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/critic/test_special_dp_critic.py -------------------------------------------------------------------------------- /tests/workers/rollout/perf/vllm_async_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/rollout/perf/vllm_async_rollout.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/rollout/test_hf_rollout.py -------------------------------------------------------------------------------- /tests/workers/test_fsdp_attn_implementation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/test_fsdp_attn_implementation.py -------------------------------------------------------------------------------- /tests/workers/test_fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/tests/workers/test_fsdp_workers.py -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/__init__.py -------------------------------------------------------------------------------- /verl/base_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/base_config.py -------------------------------------------------------------------------------- /verl/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/__init__.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/agent_loop/__init__.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/agent_loop/agent_loop.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/prometheus_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/agent_loop/prometheus_utils.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/tool_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/agent_loop/tool_agent_loop.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/tool_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/agent_loop/tool_parser.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/agent_loop/utils.py -------------------------------------------------------------------------------- /verl/experimental/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/dataset/__init__.py -------------------------------------------------------------------------------- /verl/experimental/dataset/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/dataset/sampler.py -------------------------------------------------------------------------------- /verl/experimental/dynamic_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/dynamic_dataset/__init__.py -------------------------------------------------------------------------------- /verl/experimental/reward/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/__init__.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_loop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_loop/__init__.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_loop/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_loop/base.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_loop/dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_loop/dapo.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_loop/limited.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_loop/limited.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_loop/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_loop/naive.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_loop/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_loop/registry.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_manager.py -------------------------------------------------------------------------------- /verl/experimental/reward/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/reward_model.py -------------------------------------------------------------------------------- /verl/experimental/reward/router/naive_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/experimental/reward/router/naive_router.py -------------------------------------------------------------------------------- /verl/interactions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/interactions/__init__.py -------------------------------------------------------------------------------- /verl/interactions/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/interactions/base.py -------------------------------------------------------------------------------- /verl/interactions/gsm8k_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/interactions/gsm8k_interaction.py -------------------------------------------------------------------------------- /verl/interactions/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/interactions/utils/__init__.py -------------------------------------------------------------------------------- /verl/interactions/utils/interaction_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/interactions/utils/interaction_registry.py -------------------------------------------------------------------------------- /verl/interactions/weather_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/interactions/weather_interaction.py -------------------------------------------------------------------------------- /verl/model_merger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/model_merger/__init__.py -------------------------------------------------------------------------------- /verl/model_merger/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/model_merger/__main__.py -------------------------------------------------------------------------------- /verl/model_merger/base_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/model_merger/base_model_merger.py -------------------------------------------------------------------------------- /verl/model_merger/fsdp_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/model_merger/fsdp_model_merger.py -------------------------------------------------------------------------------- /verl/model_merger/megatron_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/model_merger/megatron_model_merger.py -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/README.md -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/__init__.py -------------------------------------------------------------------------------- /verl/models/mcore/bridge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/bridge.py -------------------------------------------------------------------------------- /verl/models/mcore/config_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/config_converter.py -------------------------------------------------------------------------------- /verl/models/mcore/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/loader.py -------------------------------------------------------------------------------- /verl/models/mcore/mbridge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/mbridge.py -------------------------------------------------------------------------------- /verl/models/mcore/model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/model_forward.py -------------------------------------------------------------------------------- /verl/models/mcore/model_forward_1f1b_overlap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/model_forward_1f1b_overlap.py -------------------------------------------------------------------------------- /verl/models/mcore/model_forward_fused.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/model_forward_fused.py -------------------------------------------------------------------------------- /verl/models/mcore/model_initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/model_initializer.py -------------------------------------------------------------------------------- /verl/models/mcore/patch_v012.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/patch_v012.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/qwen2_5_vl/__init__.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/qwen2_5_vl/attention.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/qwen2_5_vl/model.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/rope_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/qwen2_5_vl/rope_utils.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/vision_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/qwen2_5_vl/vision_config.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/vision_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/qwen2_5_vl/vision_model.py -------------------------------------------------------------------------------- /verl/models/mcore/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/readme.md -------------------------------------------------------------------------------- /verl/models/mcore/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/registry.py -------------------------------------------------------------------------------- /verl/models/mcore/saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/saver.py -------------------------------------------------------------------------------- /verl/models/mcore/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/util.py -------------------------------------------------------------------------------- /verl/models/mcore/weight_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/mcore/weight_converter.py -------------------------------------------------------------------------------- /verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/qwen2/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/qwen2/megatron/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/qwen2/megatron/layers/__init__.py -------------------------------------------------------------------------------- /verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/registry.py -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /verl/models/transformers/apertus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/apertus.py -------------------------------------------------------------------------------- /verl/models/transformers/dense_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/dense_common.py -------------------------------------------------------------------------------- /verl/models/transformers/glm4v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/glm4v.py -------------------------------------------------------------------------------- /verl/models/transformers/kimi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/kimi_vl.py -------------------------------------------------------------------------------- /verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /verl/models/transformers/npu_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/npu_patch.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen3_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/transformers/qwen3_vl.py -------------------------------------------------------------------------------- /verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/protocol.py -------------------------------------------------------------------------------- /verl/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/third_party/__init__.py -------------------------------------------------------------------------------- /verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/third_party/sglang/__init__.py -------------------------------------------------------------------------------- /verl/third_party/sglang/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/third_party/sglang/parallel_state.py -------------------------------------------------------------------------------- /verl/third_party/torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/third_party/torch/__init__.py -------------------------------------------------------------------------------- /verl/third_party/torch/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/third_party/torch/distributed/__init__.py -------------------------------------------------------------------------------- /verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /verl/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/__init__.py -------------------------------------------------------------------------------- /verl/tools/base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/base_tool.py -------------------------------------------------------------------------------- /verl/tools/geo3k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/geo3k_tool.py -------------------------------------------------------------------------------- /verl/tools/gsm8k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/gsm8k_tool.py -------------------------------------------------------------------------------- /verl/tools/image_zoom_in_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/image_zoom_in_tool.py -------------------------------------------------------------------------------- /verl/tools/mcp_base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/mcp_base_tool.py -------------------------------------------------------------------------------- /verl/tools/mcp_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/mcp_search_tool.py -------------------------------------------------------------------------------- /verl/tools/sandbox_fusion_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/sandbox_fusion_tools.py -------------------------------------------------------------------------------- /verl/tools/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/schemas.py -------------------------------------------------------------------------------- /verl/tools/search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/search_tool.py -------------------------------------------------------------------------------- /verl/tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/utils/__init__.py -------------------------------------------------------------------------------- /verl/tools/utils/mcp_clients/McpClientManager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/utils/mcp_clients/McpClientManager.py -------------------------------------------------------------------------------- /verl/tools/utils/mcp_clients/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/utils/mcp_clients/utils.py -------------------------------------------------------------------------------- /verl/tools/utils/search_r1_like_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/utils/search_r1_like_utils.py -------------------------------------------------------------------------------- /verl/tools/utils/tool_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/tools/utils/tool_registry.py -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/__init__.py -------------------------------------------------------------------------------- /verl/trainer/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/__init__.py -------------------------------------------------------------------------------- /verl/trainer/config/_generated_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/_generated_ppo_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/actor/actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/actor/actor.yaml -------------------------------------------------------------------------------- /verl/trainer/config/actor/dp_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/actor/dp_actor.yaml -------------------------------------------------------------------------------- /verl/trainer/config/actor/megatron_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/actor/megatron_actor.yaml -------------------------------------------------------------------------------- /verl/trainer/config/algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/algorithm.py -------------------------------------------------------------------------------- /verl/trainer/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/config.py -------------------------------------------------------------------------------- /verl/trainer/config/critic/critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/critic/critic.yaml -------------------------------------------------------------------------------- /verl/trainer/config/critic/dp_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/critic/dp_critic.yaml -------------------------------------------------------------------------------- /verl/trainer/config/critic/megatron_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/critic/megatron_critic.yaml -------------------------------------------------------------------------------- /verl/trainer/config/data/legacy_data.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/data/legacy_data.yaml -------------------------------------------------------------------------------- /verl/trainer/config/engine/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/engine/fsdp.yaml -------------------------------------------------------------------------------- /verl/trainer/config/engine/megatron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/engine/megatron.yaml -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /verl/trainer/config/model/hf_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/model/hf_model.yaml -------------------------------------------------------------------------------- /verl/trainer/config/optim/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/optim/fsdp.yaml -------------------------------------------------------------------------------- /verl/trainer/config/optim/megatron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/optim/megatron.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ref/dp_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/ref/dp_ref.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ref/megatron_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/ref/megatron_ref.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ref/ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/ref/ref.yaml -------------------------------------------------------------------------------- /verl/trainer/config/reward_manager.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/reward_manager.yaml -------------------------------------------------------------------------------- /verl/trainer/config/rollout/rollout.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/rollout/rollout.yaml -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer_engine.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/config/sft_trainer_engine.yaml -------------------------------------------------------------------------------- /verl/trainer/constants_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/constants_ppo.py -------------------------------------------------------------------------------- /verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /verl/trainer/main_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/main_generation_server.py -------------------------------------------------------------------------------- /verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /verl/trainer/ppo/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/ppo/metric_utils.py -------------------------------------------------------------------------------- /verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /verl/trainer/ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/ppo/reward.py -------------------------------------------------------------------------------- /verl/trainer/ppo/rollout_corr_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/ppo/rollout_corr_helper.py -------------------------------------------------------------------------------- /verl/trainer/ppo/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/ppo/utils.py -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /verl/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/sft_trainer.py -------------------------------------------------------------------------------- /verl/trainer/sft_trainer_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/trainer/sft_trainer_ray.py -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/__init__.py -------------------------------------------------------------------------------- /verl/utils/activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/activation_offload.py -------------------------------------------------------------------------------- /verl/utils/attention_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/attention_utils.py -------------------------------------------------------------------------------- /verl/utils/chat_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/chat_template.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/checkpoint_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/checkpoint/checkpoint_handler.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/config.py -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /verl/utils/dataset/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/dataset_utils.py -------------------------------------------------------------------------------- /verl/utils/dataset/multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/multiturn_sft_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/vision_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/dataset/vision_utils.py -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /verl/utils/debug/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/debug/metrics.py -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /verl/utils/device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/device.py -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/distributed.py -------------------------------------------------------------------------------- /verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/experimental/__init__.py -------------------------------------------------------------------------------- /verl/utils/experimental/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/experimental/torch_functional.py -------------------------------------------------------------------------------- /verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/fs.py -------------------------------------------------------------------------------- /verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /verl/utils/groupwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/groupwise.py -------------------------------------------------------------------------------- /verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/import_utils.py -------------------------------------------------------------------------------- /verl/utils/kernel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/kernel/__init__.py -------------------------------------------------------------------------------- /verl/utils/kernel/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/kernel/kernels.py -------------------------------------------------------------------------------- /verl/utils/kernel/linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/kernel/linear_cross_entropy.py -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /verl/utils/megatron/dist_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/dist_checkpointing.py -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron/router_replay_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/router_replay_patch.py -------------------------------------------------------------------------------- /verl/utils/megatron/router_replay_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/router_replay_utils.py -------------------------------------------------------------------------------- /verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron_peft_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron_peft_utils.py -------------------------------------------------------------------------------- /verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /verl/utils/memory_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/memory_utils.py -------------------------------------------------------------------------------- /verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/metric/__init__.py -------------------------------------------------------------------------------- /verl/utils/metric/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/metric/utils.py -------------------------------------------------------------------------------- /verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/model.py -------------------------------------------------------------------------------- /verl/utils/net_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/net_utils.py -------------------------------------------------------------------------------- /verl/utils/npu_flash_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/npu_flash_attn_utils.py -------------------------------------------------------------------------------- /verl/utils/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/profiler/__init__.py -------------------------------------------------------------------------------- /verl/utils/profiler/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/profiler/config.py -------------------------------------------------------------------------------- /verl/utils/profiler/empty_annotations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/profiler/empty_annotations.py -------------------------------------------------------------------------------- /verl/utils/profiler/mstx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/profiler/mstx_profile.py -------------------------------------------------------------------------------- /verl/utils/profiler/nvtx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/profiler/nvtx_profile.py -------------------------------------------------------------------------------- /verl/utils/profiler/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/profiler/performance.py -------------------------------------------------------------------------------- /verl/utils/profiler/profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/profiler/profile.py -------------------------------------------------------------------------------- /verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/py_functional.py -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/geo3k.py -------------------------------------------------------------------------------- /verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/math_batch.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/math_dapo.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/math_reward.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/math_verify.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/prime_code/README.md -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/prime_code/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/prime_code/utils.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/prime_math/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/reward_score/prime_math/grader.py -------------------------------------------------------------------------------- /verl/utils/rollout_skip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/rollout_skip.py -------------------------------------------------------------------------------- /verl/utils/rollout_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/rollout_trace.py -------------------------------------------------------------------------------- /verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /verl/utils/tensordict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/tensordict_utils.py -------------------------------------------------------------------------------- /verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/tracking.py -------------------------------------------------------------------------------- /verl/utils/transferqueue_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/transferqueue_utils.py -------------------------------------------------------------------------------- /verl/utils/transformers_compat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/transformers_compat.py -------------------------------------------------------------------------------- /verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/ulysses.py -------------------------------------------------------------------------------- /verl/utils/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/vllm/__init__.py -------------------------------------------------------------------------------- /verl/utils/vllm/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/vllm/patch.py -------------------------------------------------------------------------------- /verl/utils/vllm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/vllm/utils.py -------------------------------------------------------------------------------- /verl/utils/vllm/vllm_fp8_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/utils/vllm/vllm_fp8_utils.py -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.7.0.dev 2 | -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/__init__.py -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/actor/base.py -------------------------------------------------------------------------------- /verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /verl/workers/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/__init__.py -------------------------------------------------------------------------------- /verl/workers/config/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/actor.py -------------------------------------------------------------------------------- /verl/workers/config/critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/critic.py -------------------------------------------------------------------------------- /verl/workers/config/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/engine.py -------------------------------------------------------------------------------- /verl/workers/config/megatron_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/megatron_peft.py -------------------------------------------------------------------------------- /verl/workers/config/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/model.py -------------------------------------------------------------------------------- /verl/workers/config/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/optimizer.py -------------------------------------------------------------------------------- /verl/workers/config/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/reward_model.py -------------------------------------------------------------------------------- /verl/workers/config/rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/config/rollout.py -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/critic/base.py -------------------------------------------------------------------------------- /verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /verl/workers/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/__init__.py -------------------------------------------------------------------------------- /verl/workers/engine/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/base.py -------------------------------------------------------------------------------- /verl/workers/engine/fsdp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/fsdp/__init__.py -------------------------------------------------------------------------------- /verl/workers/engine/fsdp/transformer_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/fsdp/transformer_impl.py -------------------------------------------------------------------------------- /verl/workers/engine/fsdp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/fsdp/utils.py -------------------------------------------------------------------------------- /verl/workers/engine/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/megatron/__init__.py -------------------------------------------------------------------------------- /verl/workers/engine/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/megatron/utils.py -------------------------------------------------------------------------------- /verl/workers/engine/mindspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/mindspeed/__init__.py -------------------------------------------------------------------------------- /verl/workers/engine/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine/utils.py -------------------------------------------------------------------------------- /verl/workers/engine_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/engine_workers.py -------------------------------------------------------------------------------- /verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_manager/abstract.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_manager/batch.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_manager/dapo.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_manager/naive.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_manager/prime.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_manager/registry.py -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/replica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/replica.py -------------------------------------------------------------------------------- /verl/workers/rollout/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/schemas.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/sglang_rollout/utils.py -------------------------------------------------------------------------------- /verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /verl/workers/rollout/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/utils.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/rollout/vllm_rollout/utils.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /verl/workers/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/utils/__init__.py -------------------------------------------------------------------------------- /verl/workers/utils/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/utils/losses.py -------------------------------------------------------------------------------- /verl/workers/utils/padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/verl/HEAD/verl/workers/utils/padding.py --------------------------------------------------------------------------------