├── .gitignore ├── LICENSE ├── README.md ├── assets ├── rl-factory.png └── we_group3.jpg ├── docker ├── Apptainerfile.rocm ├── Dockerfile.megatron ├── Dockerfile.ngc.vllm ├── Dockerfile.ngc.vllm0.8 ├── Dockerfile.ngc.vllm0.8.sagemaker ├── Dockerfile.rocm ├── Dockerfile.sglang ├── Dockerfile.vemlp.vllm.te ├── Dockerfile.vllm.sglang.megatron └── Dockfile.ngc.vllm0.8 ├── docs ├── MCP_CACHE_README.md ├── Makefile ├── README.md ├── README_android.md ├── README_vllm0.7.md ├── README_vllm0.8.md ├── _static │ ├── js │ │ └── runllm-widget.js │ └── logo.png ├── advance │ ├── checkpoint.rst │ ├── dpo_extension.rst │ ├── fsdp_extension.rst │ ├── megatron_extension.rst │ ├── placement.rst │ └── rope.rst ├── amd_tutorial │ ├── amd_build_dockerfile.md │ ├── amd_build_dockerfile_page.rst │ ├── amd_existing_docker.md │ └── amd_vllm_page.rst ├── api │ ├── trainer.rst │ └── utils.rst ├── conf.py ├── data.rst ├── examples │ ├── config.rst │ ├── gsm8k_example.rst │ ├── multi_modal_example.rst │ ├── ppo_code_architecture.rst │ └── sandbox_fusion_example.rst ├── experiment │ └── ppo.rst ├── faq │ └── faq.rst ├── hybrid_flow.rst ├── index.rst ├── perf │ ├── device_tuning.rst │ └── perf_tuning.rst ├── preparation │ ├── prepare_data.rst │ └── reward_function.rst ├── requirements-docs.txt ├── rl_factory │ ├── en │ │ ├── centralized_tool_manager.md │ │ ├── framework_design.md │ │ ├── main_tutorial.md │ │ ├── rewards.md │ │ └── tools.md │ ├── main_tutorial.md │ ├── main_tutorial_zh.md │ └── zh │ │ ├── README.md │ │ ├── main_tutorial.md │ │ ├── rewards.md │ │ └── tools.md ├── sglang_multiturn │ └── multiturn.rst ├── start │ ├── install.rst │ ├── multinode.rst │ ├── quickstart.rst │ └── ray_debug_tutorial.rst └── workers │ ├── fsdp_workers.rst │ ├── megatron_workers.rst │ ├── ray_trainer.rst │ └── sglang_worker.rst ├── environments ├── README_android.md ├── adv.sh ├── android_control.py ├── base.py ├── env_manager.py ├── env_package │ └── android │ │ ├── env_config.py │ │ ├── __init__.py │ │ ├── android_env.py │ │ ├── base_env.py │ │ ├── base_env_config.py │ │ ├── env_config.py │ │ ├── envs.py │ │ ├── reward_evaluator.py │ │ └── utils │ │ ├── __init__.py │ │ ├── android_utils.py │ │ ├── context_utils.py │ │ ├── logging_utils.py │ │ └── parse_utils.py ├── pool_api.py ├── prompts │ ├── __init__.py │ └── android.py ├── redis_port.py └── start_emulators.sh ├── envs ├── __init__.py ├── base.py ├── configs │ ├── calculator.json │ ├── chat_template.jinja │ ├── mcp_tools.pydata │ ├── mcp_vision_tools.pydata │ └── sse_mcp_tools.pydata ├── mmbase.py ├── reward_rollout_example.py ├── search.py ├── storage │ ├── CacheMe.md │ ├── __init__.py │ ├── cache │ │ ├── cache_base.py │ │ └── cachebox_cache.py │ ├── distributed_cache_system.png │ ├── manager │ │ └── storage_manager.py │ ├── persist │ │ ├── disk_persist.py │ │ └── persist_base.py │ └── test │ │ └── storage_test.py ├── tool_manager │ ├── __init__.py │ ├── base_manager.py │ ├── centralized │ │ └── centralized_qwen3_manager.py │ ├── config_manager.py │ ├── llama3_manager.py │ ├── mm_base_manager.py │ ├── qwen2_5_manager.py │ ├── qwen2_5_vl_manager.py │ └── qwen3_manager.py ├── tools │ ├── rotate.py │ └── search.py ├── utils │ ├── async_mcp_manager.py │ ├── concurrency_limiter.py │ ├── get_prompt.py │ ├── mcp_manager.py │ ├── mm_tool_utils.py │ ├── redis_cache_manager.py │ ├── schema.py │ ├── suppress_mcp_warnings.py │ ├── tool_utils.py │ └── util.py └── vision.py ├── examples ├── checkpoint │ ├── run_deepseek_megatron_ckpt.sh │ └── run_qwen_megatron_ckpt.sh ├── data_preprocess │ ├── full_hh_rlhf.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── gsm8k_multiturn_w_tool.py │ ├── hellaswag.py │ ├── math_dataset.py │ ├── multiturn.py │ └── process_vl.py ├── generation │ ├── run_deepseek7b_mutli_node.sh │ └── run_deepseek_v2_lite_math.sh ├── grpo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_math.sh │ ├── run_deepseek7b_llm_math_megatron.sh │ ├── run_deepseek7b_llm_megatron.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_math.sh │ ├── run_qwen2-7b_math_megatron.sh │ ├── run_qwen2-7b_megatron.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_sgl_megatron.sh │ ├── run_qwen2_5-7b_math_megatron_diff_tp.sh │ ├── run_qwen2_5_vl-7b.sh │ └── run_qwen3-8b.sh ├── ppo_trainer │ ├── naive_chat_scheduler.py │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_modelscope.sh │ ├── run_deepseek7b_llm_sandbox_fusion.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_gemma.sh │ ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ ├── run_qwen2-7b_megatron.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_sglang_seq_balance.sh │ ├── run_qwen2.5-32b.sh │ └── verl_getting_started.ipynb ├── ray │ └── tutorial.ipynb ├── reinforce_plus_plus_trainer │ ├── run_qwen2-7b_math_rf.sh │ └── run_qwen2-7b_math_rf_baseline.sh ├── remax_trainer │ ├── run_qwen2.5-3b_seq_balance.sh │ └── run_qwen2.5-7b_seq_balance.sh ├── rl_factory │ ├── centralized_tool_manager.sh │ ├── qwen25vl_tool.sh │ ├── reward_rollout_test.sh │ └── search_ppo.sh ├── rloo_trainer │ └── run_qwen2-7b.sh ├── sft │ ├── gsm8k │ │ ├── run_deepseek_6b7.sh │ │ ├── run_gemma_2b.sh │ │ ├── run_gemma_7b.sh │ │ ├── run_qwen_05_peft.sh │ │ ├── run_qwen_05_sp2.sh │ │ └── run_qwen_05_sp2_liger.sh │ └── multiturn │ │ └── run_qwen_05_sp2.sh ├── sglang_multiturn │ ├── README.md │ ├── config │ │ ├── gsm8k_multiturn_grpo.yaml │ │ └── tool_config │ │ │ └── gsm8k_tool_config.yaml │ ├── run_qwen2.5-3b_gsm8k_multiturn.sh │ └── run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh ├── slurm │ └── ray_on_slurm.slurm ├── split_placement │ ├── README.md │ ├── config │ │ └── ppo_trainer_split.yaml │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py └── tuning │ ├── 14b │ └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh │ ├── 32b │ └── qwen2_32B_grpo_8_h20_megatron_vllm.sh │ ├── 70b │ ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh │ └── qwen2-70b_grpo_32_h800_fsdp_vllm.sh │ └── 7b │ └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh ├── generator ├── __init__.py ├── api_generator.py └── base_generator.py ├── install.sh ├── main_eval.sh ├── main_grpo.sh ├── main_grpo_megatron.sh ├── main_gspo_megatron.sh ├── main_ppo.sh ├── project └── test_questions.json ├── pyproject.toml ├── rag_server ├── README.md ├── data_process │ └── nq_search.py ├── download.py ├── launch.sh └── retrieval_server.py ├── recipe ├── dapo │ ├── README.md │ ├── config │ │ └── dapo_trainer.yaml │ ├── dapo_ray_trainer.py │ ├── main_dapo.py │ ├── prepare_dapo_data.sh │ ├── run_dapo_early_qwen2.5_32b.sh │ ├── run_dapo_qwen2.5_0.5b.sh │ ├── run_dapo_qwen2.5_32b.sh │ ├── run_dapo_wo_ds_qwen2.5_32b.sh │ └── test_dapo_7b.sh ├── drgrpo │ └── README.md ├── prime │ ├── __init__.py │ ├── config │ │ └── prime_trainer.yaml │ ├── main_prime.py │ ├── prime_core_algos.py │ ├── prime_dp_rm.py │ ├── prime_fsdp_workers.py │ ├── prime_ray_trainer.py │ └── run_prime_qwen.sh ├── r1 │ ├── README.md │ ├── __init__.py │ ├── config │ │ └── evaluation.yaml │ ├── data_process.py │ ├── main_eval.py │ ├── reward_score.py │ ├── run_r1_distill_qwen.sh │ └── tasks │ │ ├── __init__.py │ │ ├── gpqa.py │ │ ├── livecodebench.py │ │ └── math.py ├── sppo │ ├── README.md │ ├── __init__.py │ ├── config │ │ └── sppo_trainer.yaml │ ├── dp_actor.py │ ├── main_sppo.py │ ├── run_qwen2.5-7b_rm.sh │ ├── sppo_ray_trainer.py │ └── sppo_worker.py └── travelplanner │ ├── README_TravelPlanner.md │ ├── env │ └── travelplanner.py │ ├── main_grpo.sh │ ├── reward_score │ ├── __init__.py │ ├── database │ │ ├── accommodations │ │ │ └── clean_accommodations_2022.csv │ │ ├── attractions │ │ │ └── attractions.csv │ │ ├── background │ │ │ ├── citySet.txt │ │ │ ├── citySet_with_states.txt │ │ │ └── stateSet.txt │ │ ├── googleDistanceMatrix │ │ │ └── distance.csv │ │ └── restaurants │ │ │ └── clean_restaurant_2022.csv │ ├── evaluation │ │ ├── __init__.py │ │ ├── commonsense_constraint.py │ │ ├── custom_eval.py │ │ ├── eval.py │ │ └── hard_constraint.py │ ├── hard_constraints.py │ ├── reward_test.py │ ├── tools │ │ ├── __init__.py │ │ ├── accommodations │ │ │ ├── __init__.py │ │ │ └── apis.py │ │ ├── attractions │ │ │ └── apis.py │ │ ├── cities │ │ │ └── apis.py │ │ ├── flights │ │ │ ├── __init__.py │ │ │ └── apis.py │ │ ├── googleDistanceMatrix │ │ │ └── apis.py │ │ ├── notebook │ │ │ ├── apis.py │ │ │ └── test.py │ │ ├── planner │ │ │ ├── apis.py │ │ │ ├── env.py │ │ │ ├── sole_planning.py │ │ │ └── test.py │ │ └── restaurants │ │ │ ├── __init__.py │ │ │ └── apis.py │ └── travel.py │ ├── server │ ├── __init__.py │ ├── accommodation_server.py │ ├── attraction_server.py │ ├── base_server.py │ ├── distance_server.py │ ├── flight_server.py │ ├── launch.sh │ └── restaurant_server.py │ ├── tools │ ├── accommodation.py │ ├── attraction.py │ ├── distance.py │ ├── flight.py │ ├── restaurant.py │ └── test.py │ └── travelplanner_preprocess.py ├── redis_server ├── README.md ├── client.py └── start_redis.sh ├── requirements.txt ├── requirements_sglang.txt ├── scripts ├── converter_hf_to_mcore.py ├── diagnose.py ├── format.sh ├── install_nginx.sh ├── install_vllm_sglang_mcore.sh ├── model_merger.py ├── nq_search.py ├── run_vllm_with_nginx.sh └── vllm_server.sh ├── setup.py ├── swift ├── r1_dataset.py ├── r1_reward.py ├── rlfactory_env.json ├── rlfactory_reward.py └── rlfactory_toolcall.py ├── swift_grpo.sh ├── tests ├── __init__.py ├── checkpoint │ ├── run_deepseek_megatron_ckpt.sh │ ├── run_qwen_megatron_ckpt.sh │ └── test_fsdp_ckpt.py ├── distributed │ ├── run_all.sh │ └── test_tensor_dict.py ├── distro │ └── requirements.py ├── e2e │ ├── __init__.py │ ├── arithmetic_sequence │ │ ├── data │ │ │ └── create_dataset.py │ │ ├── model │ │ │ ├── config.json │ │ │ ├── create_model_tokenizer.py │ │ │ ├── generation_config.json │ │ │ ├── model.safetensors │ │ │ └── tokenizer_config.json │ │ └── rl │ │ │ ├── README.md │ │ │ └── main_trainer.py │ ├── check_custom_rwd_fn.py │ ├── check_results.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ ├── __init__.py │ │ │ ├── task.py │ │ │ └── tokenizer.py │ ├── generation │ │ └── run_gen_qwen05.sh │ ├── ppo_trainer │ │ ├── run_function_reward.sh │ │ └── run_model_reward.sh │ ├── run_dapo.sh │ ├── run_deepseek_grpo.sh │ ├── run_deepseek_grpo_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_deepseek_megatron_parallelism.sh │ ├── run_gsm8k_fsdp_sgl_multiturn_w_tool.sh │ ├── run_ppo_trainer_megatron.sh │ ├── run_prime.sh │ ├── run_qwen2vl_geo3k_function_rm.sh │ ├── run_qwen_grpo.sh │ ├── run_qwen_grpo_megatron.sh │ ├── run_qwen_gsm8k_custom_function_rm.sh │ ├── run_qwen_gsm8k_function_rm.sh │ ├── run_qwen_gsm8k_function_rm_grpo.sh │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_function_rm_remax.sh │ ├── run_qwen_gsm8k_model_rm.sh │ ├── run_qwen_gsm8k_model_rm_liger_kernel.sh │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ ├── run_qwen_gsm8k_prime.sh │ ├── run_qwen_megatron.sh │ ├── run_qwen_megatron_parallelism.sh │ ├── run_r1_distill_qwen_aime24_eval.sh │ ├── run_ray_trainer.sh │ ├── run_ray_trainer_fire_sampling.sh │ ├── run_ray_trainer_rmpad.sh │ ├── run_sppo.sh │ ├── run_test.sh │ └── sft │ │ ├── run_sft.sh │ │ └── test_sp_loss_match.py ├── generation │ └── run_gen_qwen05.sh ├── gpu_utility │ ├── test_memory_buffers.py │ ├── test_ops.py │ └── test_torch_functional.py ├── kernels │ └── test_linear_cross_entropy.py ├── kill_github_tests.sh ├── model │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── models │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── ray │ ├── check_worker_alive │ │ └── main.py │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_check_worker_alive.py │ ├── test_colocated_workers.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_ray_local_envs.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── ray_cpu │ ├── check_worker_alive │ │ └── main.py │ ├── test_auto_padding.py │ ├── test_check_worker_alive.py │ ├── test_decorator.py │ ├── test_fused_workers.py │ └── test_ray_local_envs.py ├── ray_gpu │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_colocated_workers.py │ ├── test_colocated_workers_fused.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── reward_score │ └── test_sandbox_fusion.py ├── rl_factory │ ├── envs │ │ └── test_tool_use.py │ ├── generator │ │ ├── async_generator_test.py │ │ ├── async_results.csv │ │ └── test.sh │ ├── rewarder │ │ └── test_parallel.py │ ├── test_async_tool_manager.py │ ├── test_llama3_manager.py │ └── test_qwen3_manager.py ├── rollout │ ├── run_fsdp_vllm.py │ ├── test_sglang_spmd.py │ ├── test_vllm_hf_loader.py │ └── test_vllm_spmd.py ├── sandbox │ └── test_sandbox.py ├── sanity │ ├── check_license.py │ └── test_import.py ├── sft │ ├── run_sft.sh │ ├── run_sft_qwen05_peft.sh │ ├── run_sft_qwen05_sp2_liger.sh │ ├── run_sft_sp_loss_match.sh │ └── test_sp_loss_match.py ├── single_controller │ └── base │ │ └── test_decorator.py ├── test_mcp_cache_real.py ├── test_protocol.py ├── trainer │ ├── __init__.py │ └── ppo │ │ ├── __init__.py │ │ └── test_metric_utils.py ├── utility │ └── test_tensor_dict_utilities.py ├── utils │ ├── cpu_tests │ │ ├── test_fs.py │ │ ├── test_import_utils.py │ │ ├── test_model.py │ │ ├── test_module.py │ │ └── test_timeout_decorator.py │ └── gpu_tests │ │ ├── checkpoint │ │ └── test_fsdp_ckpt.py │ │ ├── dataset │ │ ├── test_multiturn_sft_dataset.py │ │ ├── test_rl_dataset.py │ │ ├── test_rm_dataset.py │ │ └── test_sft_dataset.py │ │ ├── test_flops_counter.py │ │ ├── test_seqlen_balancing.py │ │ └── test_torch_functional.py ├── verl │ └── utils │ │ └── dataset │ │ ├── test_rl_dataset.py │ │ ├── test_rm_dataset.py │ │ └── test_sft_dataset.py └── workers │ └── rollout │ ├── async_rollout_utils.py │ ├── run_fsdp_vllm.py │ ├── test_hf_rollout.py │ ├── test_sglang_async_rollout_w_tools.py │ ├── test_sglang_async_spmd.py │ ├── test_sglang_spmd.py │ ├── test_vllm_hf_loader.py │ ├── test_vllm_multi_turn.py │ ├── test_vllm_spmd.py │ ├── test_vllm_tool_calling.py │ └── utils_sglang.py ├── verl ├── __init__.py ├── base_config.py ├── experimental │ ├── __init__.py │ ├── agent_loop │ │ ├── __init__.py │ │ ├── agent_loop.py │ │ ├── single_turn_agent_loop.py │ │ ├── tool_agent_loop.py │ │ └── tool_parser.py │ ├── dataset │ │ ├── __init__.py │ │ └── sampler.py │ └── dynamic_dataset │ │ ├── __init__.py │ │ └── dynamicgen_dataset.py ├── interactions │ ├── __init__.py │ ├── base.py │ ├── gsm8k_interaction.py │ └── utils │ │ ├── __init__.py │ │ └── interaction_registry.py ├── model_merger │ ├── __init__.py │ ├── __main__.py │ ├── base_model_merger.py │ ├── fsdp_model_merger.py │ └── megatron_model_merger.py ├── models │ ├── README.md │ ├── __init__.py │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── llama_loader.py │ │ │ ├── llama_loader_depracated.py │ │ │ └── llama_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_llama_megatron.py │ ├── mcore │ │ ├── __init__.py │ │ ├── config_converter.py │ │ ├── loader.py │ │ ├── mbridge.py │ │ ├── model_forward.py │ │ ├── model_forward_fused.py │ │ ├── model_initializer.py │ │ ├── patch_v012.py │ │ ├── qwen2_5_vl │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── model.py │ │ │ ├── rope_utils.py │ │ │ ├── vision_config.py │ │ │ ├── vision_model.py │ │ │ └── vision_transformer_block.py │ │ ├── readme.md │ │ ├── registry.py │ │ ├── saver.py │ │ ├── util.py │ │ └── weight_converter.py │ ├── qwen2 │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── qwen2_loader.py │ │ │ ├── qwen2_loader_depracated.py │ │ │ └── qwen2_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_qwen2_megatron.py │ ├── registry.py │ ├── transformers │ │ ├── __init__.py │ │ ├── dense_common.py │ │ ├── kimi_vl.py │ │ ├── llama.py │ │ ├── monkey_patch.py │ │ ├── npu_patch.py │ │ ├── qwen2.py │ │ ├── qwen2_5_vl.py │ │ └── qwen2_vl.py │ └── weight_loader_registry.py ├── protocol.py ├── py.typed ├── single_controller │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── decorator.py │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ ├── worker.py │ │ │ └── worker_group.py │ │ ├── register_center │ │ │ ├── __init__.py │ │ │ └── ray.py │ │ ├── worker.py │ │ └── worker_group.py │ └── ray │ │ ├── __init__.py │ │ ├── base.py │ │ └── megatron.py ├── third_party │ ├── __init__.py │ ├── sglang │ │ ├── __init__.py │ │ └── parallel_state.py │ ├── torch │ │ ├── __init__.py │ │ └── distributed │ │ │ ├── __init__.py │ │ │ ├── _state_dict_utils.py │ │ │ └── checkpoint │ │ │ ├── __init__.py │ │ │ └── state_dict.py │ └── vllm │ │ └── __init__.py ├── tools │ ├── __init__.py │ ├── base_tool.py │ ├── geo3k_tool.py │ ├── gsm8k_tool.py │ ├── mcp_base_tool.py │ ├── mcp_search_tool.py │ ├── sandbox_fusion_tools.py │ ├── schemas.py │ ├── search_tool.py │ └── utils │ │ ├── __init__.py │ │ ├── mcp_clients │ │ ├── McpClientManager.py │ │ └── utils.py │ │ ├── search_r1_like_utils.py │ │ └── tool_registry.py ├── trainer │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ ├── _generated_ppo_megatron_trainer.yaml │ │ ├── _generated_ppo_trainer.yaml │ │ ├── actor │ │ │ ├── actor.yaml │ │ │ ├── dp_actor.yaml │ │ │ └── megatron_actor.yaml │ │ ├── algorithm.py │ │ ├── config.py │ │ ├── critic │ │ │ ├── critic.yaml │ │ │ ├── dp_critic.yaml │ │ │ └── megatron_critic.yaml │ │ ├── data │ │ │ └── legacy_data.yaml │ │ ├── evaluation.yaml │ │ ├── generation.yaml │ │ ├── npu_profile │ │ │ └── npu_profile.yaml │ │ ├── ppo_megatron_trainer.yaml │ │ ├── ppo_trainer.yaml │ │ ├── ref │ │ │ ├── dp_ref.yaml │ │ │ ├── megatron_ref.yaml │ │ │ └── ref.yaml │ │ ├── reward_model │ │ │ ├── dp_reward_model.yaml │ │ │ ├── megatron_reward_model.yaml │ │ │ └── reward_model.yaml │ │ ├── rl_factory_ppo_megatron_trainer.yaml │ │ ├── rl_factory_ppo_trainer.yaml │ │ ├── rollout │ │ │ ├── reward_rollout.yaml │ │ │ └── rollout.yaml │ │ └── sft_trainer.yaml │ ├── constants_ppo.py │ ├── fsdp_sft_trainer.py │ ├── main_eval.py │ ├── main_evaluate.py │ ├── main_generation.py │ ├── main_ppo.py │ ├── ppo │ │ ├── __init__.py │ │ ├── core_algos.py │ │ ├── metric_utils.py │ │ ├── ray_trainer.py │ │ └── reward.py │ └── runtime_env.yaml ├── utils │ ├── __init__.py │ ├── activation_offload.py │ ├── checkpoint │ │ ├── __init__.py │ │ ├── checkpoint_manager.py │ │ ├── fsdp_checkpoint_manager.py │ │ └── megatron_checkpoint_manager.py │ ├── config.py │ ├── dataset │ │ ├── README.md │ │ ├── __init__.py │ │ ├── multiturn_sft_dataset.py │ │ ├── rl_dataset.py │ │ ├── rm_dataset.py │ │ ├── sft_dataset.py │ │ └── vision_utils.py │ ├── debug │ │ ├── __init__.py │ │ ├── empty_annotations.py │ │ ├── nvtx_profile.py │ │ ├── performance.py │ │ └── trajectory_tracker.py │ ├── device.py │ ├── distributed.py │ ├── experimental │ │ ├── __init__.py │ │ └── torch_functional.py │ ├── flops_counter.py │ ├── fs.py │ ├── fsdp_utils.py │ ├── hdfs_io.py │ ├── import_utils.py │ ├── kernel │ │ ├── __init__.py │ │ ├── kernels.py │ │ └── linear_cross_entropy.py │ ├── logger │ │ ├── __init__.py │ │ └── aggregate_logger.py │ ├── logging_utils.py │ ├── megatron │ │ ├── __init__.py │ │ ├── dist_checkpointing.py │ │ ├── memory.py │ │ ├── optimizer.py │ │ ├── pipeline_parallel.py │ │ ├── sequence_parallel.py │ │ └── tensor_parallel.py │ ├── megatron_utils.py │ ├── memory_buffer.py │ ├── metric │ │ ├── __init__.py │ │ └── utils.py │ ├── model.py │ ├── net_utils.py │ ├── profiler │ │ ├── __init__.py │ │ ├── config.py │ │ ├── empty_annotations.py │ │ ├── mstx_profile.py │ │ ├── nvtx_profile.py │ │ ├── performance.py │ │ └── profile.py │ ├── py_functional.py │ ├── ray_utils.py │ ├── rendezvous │ │ ├── __init__.py │ │ └── ray_backend.py │ ├── reward_score │ │ ├── __init__.py │ │ ├── geo3k.py │ │ ├── gsm8k.py │ │ ├── math.py │ │ ├── math_batch.py │ │ ├── math_dapo.py │ │ ├── math_verify.py │ │ ├── prime_code │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── testing_util.py │ │ │ └── utils.py │ │ ├── prime_math │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ │ ├── sandbox_fusion │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ └── search_r1_like_qa_em.py │ ├── rollout_trace.py │ ├── seqlen_balancing.py │ ├── tokenizer.py │ ├── torch_dtypes.py │ ├── torch_functional.py │ ├── tracking.py │ ├── ulysses.py │ └── vllm_utils.py ├── version │ └── version └── workers │ ├── __init__.py │ ├── actor │ ├── __init__.py │ ├── base.py │ ├── dp_actor.py │ └── megatron_actor.py │ ├── config │ ├── __init__.py │ ├── actor.py │ ├── engine.py │ └── optimizer.py │ ├── critic │ ├── __init__.py │ ├── base.py │ ├── dp_critic.py │ └── megatron_critic.py │ ├── engine │ ├── __init__.py │ ├── base.py │ ├── fsdp │ │ ├── __init__.py │ │ ├── engine_impl.py │ │ └── utils.py │ └── megatron │ │ ├── __init__.py │ │ └── engine_impl.py │ ├── fsdp_workers.py │ ├── megatron_workers.py │ ├── reward_manager │ ├── __init__.py │ ├── batch.py │ ├── dapo.py │ ├── naive.py │ ├── parallel.py │ ├── prime.py │ └── registry.py │ ├── reward_model │ ├── __init__.py │ ├── base.py │ └── megatron │ │ ├── __init__.py │ │ └── reward_model.py │ ├── roles │ ├── __init__.py │ ├── actor.py │ └── critic.py │ ├── rollout │ ├── __init__.py │ ├── async_server.py │ ├── base.py │ ├── chat_scheduler.py │ ├── hf_rollout.py │ ├── naive │ │ ├── __init__.py │ │ └── naive_rollout.py │ ├── schemas.py │ ├── sglang_rollout │ │ ├── __init__.py │ │ ├── async_sglang_server.py │ │ ├── sglang_rollout.py │ │ └── utils.py │ ├── tokenizer.py │ └── vllm_rollout │ │ ├── __init__.py │ │ ├── vllm_async_server.py │ │ └── vllm_rollout_spmd.py │ └── sharding_manager │ ├── __init__.py │ ├── base.py │ ├── fsdp_sglang.py │ ├── fsdp_ulysses.py │ ├── fsdp_vllm.py │ ├── megatron_sglang.py │ └── megatron_vllm.py └── webui ├── README.md ├── app.py ├── components ├── flow_editor.py ├── flow_editor.pyi └── rewards │ └── graders │ ├── __init__.py │ ├── base.py │ ├── graders.py │ └── qwen_math.py ├── requirements.txt ├── rewards └── reward_config.json ├── run_webui.sh └── tabs ├── __init__.py ├── data_processing.py ├── project_management.py ├── reward_definition.py ├── tool_definition.py ├── training_deployment.py └── utils └── get_env_codes.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/README.md -------------------------------------------------------------------------------- /assets/rl-factory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/assets/rl-factory.png -------------------------------------------------------------------------------- /assets/we_group3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/assets/we_group3.jpg -------------------------------------------------------------------------------- /docker/Apptainerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Apptainerfile.rocm -------------------------------------------------------------------------------- /docker/Dockerfile.megatron: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.megatron -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.ngc.vllm -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm0.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.ngc.vllm0.8 -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm0.8.sagemaker: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.ngc.vllm0.8.sagemaker -------------------------------------------------------------------------------- /docker/Dockerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.rocm -------------------------------------------------------------------------------- /docker/Dockerfile.sglang: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.sglang -------------------------------------------------------------------------------- /docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.vemlp.vllm.te -------------------------------------------------------------------------------- /docker/Dockerfile.vllm.sglang.megatron: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockerfile.vllm.sglang.megatron -------------------------------------------------------------------------------- /docker/Dockfile.ngc.vllm0.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docker/Dockfile.ngc.vllm0.8 -------------------------------------------------------------------------------- /docs/MCP_CACHE_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/MCP_CACHE_README.md -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/README_android.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/README_android.md -------------------------------------------------------------------------------- /docs/README_vllm0.7.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/README_vllm0.7.md -------------------------------------------------------------------------------- /docs/README_vllm0.8.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/README_vllm0.8.md -------------------------------------------------------------------------------- /docs/_static/js/runllm-widget.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/_static/js/runllm-widget.js -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/advance/checkpoint.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/advance/checkpoint.rst -------------------------------------------------------------------------------- /docs/advance/dpo_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/advance/dpo_extension.rst -------------------------------------------------------------------------------- /docs/advance/fsdp_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/advance/fsdp_extension.rst -------------------------------------------------------------------------------- /docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/advance/megatron_extension.rst -------------------------------------------------------------------------------- /docs/advance/placement.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/advance/placement.rst -------------------------------------------------------------------------------- /docs/advance/rope.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/advance/rope.rst -------------------------------------------------------------------------------- /docs/amd_tutorial/amd_build_dockerfile.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/amd_tutorial/amd_build_dockerfile.md -------------------------------------------------------------------------------- /docs/amd_tutorial/amd_build_dockerfile_page.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/amd_tutorial/amd_build_dockerfile_page.rst -------------------------------------------------------------------------------- /docs/amd_tutorial/amd_existing_docker.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/amd_tutorial/amd_existing_docker.md -------------------------------------------------------------------------------- /docs/amd_tutorial/amd_vllm_page.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/amd_tutorial/amd_vllm_page.rst -------------------------------------------------------------------------------- /docs/api/trainer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/api/trainer.rst -------------------------------------------------------------------------------- /docs/api/utils.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/api/utils.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/data.rst -------------------------------------------------------------------------------- /docs/examples/config.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/examples/config.rst -------------------------------------------------------------------------------- /docs/examples/gsm8k_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/examples/gsm8k_example.rst -------------------------------------------------------------------------------- /docs/examples/multi_modal_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/examples/multi_modal_example.rst -------------------------------------------------------------------------------- /docs/examples/ppo_code_architecture.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/examples/ppo_code_architecture.rst -------------------------------------------------------------------------------- /docs/examples/sandbox_fusion_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/examples/sandbox_fusion_example.rst -------------------------------------------------------------------------------- /docs/experiment/ppo.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/experiment/ppo.rst -------------------------------------------------------------------------------- /docs/faq/faq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/faq/faq.rst -------------------------------------------------------------------------------- /docs/hybrid_flow.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/hybrid_flow.rst -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/perf/device_tuning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/perf/device_tuning.rst -------------------------------------------------------------------------------- /docs/perf/perf_tuning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/perf/perf_tuning.rst -------------------------------------------------------------------------------- /docs/preparation/prepare_data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/preparation/prepare_data.rst -------------------------------------------------------------------------------- /docs/preparation/reward_function.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/preparation/reward_function.rst -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/rl_factory/en/centralized_tool_manager.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/en/centralized_tool_manager.md -------------------------------------------------------------------------------- /docs/rl_factory/en/framework_design.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/en/framework_design.md -------------------------------------------------------------------------------- /docs/rl_factory/en/main_tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/en/main_tutorial.md -------------------------------------------------------------------------------- /docs/rl_factory/en/rewards.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/en/rewards.md -------------------------------------------------------------------------------- /docs/rl_factory/en/tools.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/en/tools.md -------------------------------------------------------------------------------- /docs/rl_factory/main_tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/main_tutorial.md -------------------------------------------------------------------------------- /docs/rl_factory/main_tutorial_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/main_tutorial_zh.md -------------------------------------------------------------------------------- /docs/rl_factory/zh/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/zh/README.md -------------------------------------------------------------------------------- /docs/rl_factory/zh/main_tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/zh/main_tutorial.md -------------------------------------------------------------------------------- /docs/rl_factory/zh/rewards.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/zh/rewards.md -------------------------------------------------------------------------------- /docs/rl_factory/zh/tools.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/rl_factory/zh/tools.md -------------------------------------------------------------------------------- /docs/sglang_multiturn/multiturn.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/sglang_multiturn/multiturn.rst -------------------------------------------------------------------------------- /docs/start/install.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/start/install.rst -------------------------------------------------------------------------------- /docs/start/multinode.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/start/multinode.rst -------------------------------------------------------------------------------- /docs/start/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/start/quickstart.rst -------------------------------------------------------------------------------- /docs/start/ray_debug_tutorial.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/start/ray_debug_tutorial.rst -------------------------------------------------------------------------------- /docs/workers/fsdp_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/workers/fsdp_workers.rst -------------------------------------------------------------------------------- /docs/workers/megatron_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/workers/megatron_workers.rst -------------------------------------------------------------------------------- /docs/workers/ray_trainer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/workers/ray_trainer.rst -------------------------------------------------------------------------------- /docs/workers/sglang_worker.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/docs/workers/sglang_worker.rst -------------------------------------------------------------------------------- /environments/README_android.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/README_android.md -------------------------------------------------------------------------------- /environments/adv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/adv.sh -------------------------------------------------------------------------------- /environments/android_control.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/android_control.py -------------------------------------------------------------------------------- /environments/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/base.py -------------------------------------------------------------------------------- /environments/env_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_manager.py -------------------------------------------------------------------------------- /environments/env_package/android/ env_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/ env_config.py -------------------------------------------------------------------------------- /environments/env_package/android/__init__.py: -------------------------------------------------------------------------------- 1 | from .envs import build_android_envs -------------------------------------------------------------------------------- /environments/env_package/android/android_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/android_env.py -------------------------------------------------------------------------------- /environments/env_package/android/base_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/base_env.py -------------------------------------------------------------------------------- /environments/env_package/android/base_env_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/base_env_config.py -------------------------------------------------------------------------------- /environments/env_package/android/env_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/env_config.py -------------------------------------------------------------------------------- /environments/env_package/android/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/envs.py -------------------------------------------------------------------------------- /environments/env_package/android/reward_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/reward_evaluator.py -------------------------------------------------------------------------------- /environments/env_package/android/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/utils/__init__.py -------------------------------------------------------------------------------- /environments/env_package/android/utils/android_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/utils/android_utils.py -------------------------------------------------------------------------------- /environments/env_package/android/utils/context_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/utils/context_utils.py -------------------------------------------------------------------------------- /environments/env_package/android/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/utils/logging_utils.py -------------------------------------------------------------------------------- /environments/env_package/android/utils/parse_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/env_package/android/utils/parse_utils.py -------------------------------------------------------------------------------- /environments/pool_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/pool_api.py -------------------------------------------------------------------------------- /environments/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | from .android import * -------------------------------------------------------------------------------- /environments/prompts/android.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/prompts/android.py -------------------------------------------------------------------------------- /environments/redis_port.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/redis_port.py -------------------------------------------------------------------------------- /environments/start_emulators.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/environments/start_emulators.sh -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/__init__.py -------------------------------------------------------------------------------- /envs/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/base.py -------------------------------------------------------------------------------- /envs/configs/calculator.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/configs/calculator.json -------------------------------------------------------------------------------- /envs/configs/chat_template.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/configs/chat_template.jinja -------------------------------------------------------------------------------- /envs/configs/mcp_tools.pydata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/configs/mcp_tools.pydata -------------------------------------------------------------------------------- /envs/configs/mcp_vision_tools.pydata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/configs/mcp_vision_tools.pydata -------------------------------------------------------------------------------- /envs/configs/sse_mcp_tools.pydata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/configs/sse_mcp_tools.pydata -------------------------------------------------------------------------------- /envs/mmbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/mmbase.py -------------------------------------------------------------------------------- /envs/reward_rollout_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/reward_rollout_example.py -------------------------------------------------------------------------------- /envs/search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/search.py -------------------------------------------------------------------------------- /envs/storage/CacheMe.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/CacheMe.md -------------------------------------------------------------------------------- /envs/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/__init__.py -------------------------------------------------------------------------------- /envs/storage/cache/cache_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/cache/cache_base.py -------------------------------------------------------------------------------- /envs/storage/cache/cachebox_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/cache/cachebox_cache.py -------------------------------------------------------------------------------- /envs/storage/distributed_cache_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/distributed_cache_system.png -------------------------------------------------------------------------------- /envs/storage/manager/storage_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/manager/storage_manager.py -------------------------------------------------------------------------------- /envs/storage/persist/disk_persist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/persist/disk_persist.py -------------------------------------------------------------------------------- /envs/storage/persist/persist_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/persist/persist_base.py -------------------------------------------------------------------------------- /envs/storage/test/storage_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/storage/test/storage_test.py -------------------------------------------------------------------------------- /envs/tool_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/__init__.py -------------------------------------------------------------------------------- /envs/tool_manager/base_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/base_manager.py -------------------------------------------------------------------------------- /envs/tool_manager/centralized/centralized_qwen3_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/centralized/centralized_qwen3_manager.py -------------------------------------------------------------------------------- /envs/tool_manager/config_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/config_manager.py -------------------------------------------------------------------------------- /envs/tool_manager/llama3_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/llama3_manager.py -------------------------------------------------------------------------------- /envs/tool_manager/mm_base_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/mm_base_manager.py -------------------------------------------------------------------------------- /envs/tool_manager/qwen2_5_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/qwen2_5_manager.py -------------------------------------------------------------------------------- /envs/tool_manager/qwen2_5_vl_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/qwen2_5_vl_manager.py -------------------------------------------------------------------------------- /envs/tool_manager/qwen3_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tool_manager/qwen3_manager.py -------------------------------------------------------------------------------- /envs/tools/rotate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tools/rotate.py -------------------------------------------------------------------------------- /envs/tools/search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/tools/search.py -------------------------------------------------------------------------------- /envs/utils/async_mcp_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/async_mcp_manager.py -------------------------------------------------------------------------------- /envs/utils/concurrency_limiter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/concurrency_limiter.py -------------------------------------------------------------------------------- /envs/utils/get_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/get_prompt.py -------------------------------------------------------------------------------- /envs/utils/mcp_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/mcp_manager.py -------------------------------------------------------------------------------- /envs/utils/mm_tool_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/mm_tool_utils.py -------------------------------------------------------------------------------- /envs/utils/redis_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/redis_cache_manager.py -------------------------------------------------------------------------------- /envs/utils/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/schema.py -------------------------------------------------------------------------------- /envs/utils/suppress_mcp_warnings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/suppress_mcp_warnings.py -------------------------------------------------------------------------------- /envs/utils/tool_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/tool_utils.py -------------------------------------------------------------------------------- /envs/utils/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/utils/util.py -------------------------------------------------------------------------------- /envs/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/envs/vision.py -------------------------------------------------------------------------------- /examples/checkpoint/run_deepseek_megatron_ckpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/checkpoint/run_deepseek_megatron_ckpt.sh -------------------------------------------------------------------------------- /examples/checkpoint/run_qwen_megatron_ckpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/checkpoint/run_qwen_megatron_ckpt.sh -------------------------------------------------------------------------------- /examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /examples/data_preprocess/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/geo3k.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/gsm8k_multiturn_w_tool.py -------------------------------------------------------------------------------- /examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /examples/data_preprocess/multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/multiturn.py -------------------------------------------------------------------------------- /examples/data_preprocess/process_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/data_preprocess/process_vl.py -------------------------------------------------------------------------------- /examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/generation/run_deepseek7b_mutli_node.sh -------------------------------------------------------------------------------- /examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/generation/run_deepseek_v2_lite_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_deepseek7b_llm_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_deepseek7b_llm_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2-7b_math_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2-7b_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5-7b_math_megatron_diff_tp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2_5-7b_math_megatron_diff_tp.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_vl-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen2_5_vl-7b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/grpo_trainer/run_qwen3-8b.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/naive_chat_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/naive_chat_scheduler.py -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_deepseek_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_gemma.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen2-7b_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen2-7b_rm.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2.5-32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/run_qwen2.5-32b.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/verl_getting_started.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ppo_trainer/verl_getting_started.ipynb -------------------------------------------------------------------------------- /examples/ray/tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/ray/tutorial.ipynb -------------------------------------------------------------------------------- /examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf.sh -------------------------------------------------------------------------------- /examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf_baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf_baseline.sh -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh -------------------------------------------------------------------------------- /examples/rl_factory/centralized_tool_manager.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/rl_factory/centralized_tool_manager.sh -------------------------------------------------------------------------------- /examples/rl_factory/qwen25vl_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/rl_factory/qwen25vl_tool.sh -------------------------------------------------------------------------------- /examples/rl_factory/reward_rollout_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/rl_factory/reward_rollout_test.sh -------------------------------------------------------------------------------- /examples/rl_factory/search_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/rl_factory/search_ppo.sh -------------------------------------------------------------------------------- /examples/rloo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/rloo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sft/gsm8k/run_deepseek_6b7.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sft/gsm8k/run_gemma_2b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sft/gsm8k/run_gemma_7b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sft/gsm8k/run_qwen_05_peft.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sft/gsm8k/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh -------------------------------------------------------------------------------- /examples/sft/multiturn/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sft/multiturn/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /examples/sglang_multiturn/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sglang_multiturn/README.md -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh -------------------------------------------------------------------------------- /examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh -------------------------------------------------------------------------------- /examples/slurm/ray_on_slurm.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/slurm/ray_on_slurm.slurm -------------------------------------------------------------------------------- /examples/split_placement/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/split_placement/README.md -------------------------------------------------------------------------------- /examples/split_placement/config/ppo_trainer_split.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/split_placement/config/ppo_trainer_split.yaml -------------------------------------------------------------------------------- /examples/split_placement/main_ppo_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/split_placement/main_ppo_split.py -------------------------------------------------------------------------------- /examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/split_placement/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/split_placement/split_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/split_placement/split_monkey_patch.py -------------------------------------------------------------------------------- /examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/32b/qwen2_32B_grpo_8_h20_megatron_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/tuning/32b/qwen2_32B_grpo_8_h20_megatron_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/generator/__init__.py -------------------------------------------------------------------------------- /generator/api_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/generator/api_generator.py -------------------------------------------------------------------------------- /generator/base_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/generator/base_generator.py -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/install.sh -------------------------------------------------------------------------------- /main_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/main_eval.sh -------------------------------------------------------------------------------- /main_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/main_grpo.sh -------------------------------------------------------------------------------- /main_grpo_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/main_grpo_megatron.sh -------------------------------------------------------------------------------- /main_gspo_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/main_gspo_megatron.sh -------------------------------------------------------------------------------- /main_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/main_ppo.sh -------------------------------------------------------------------------------- /project/test_questions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/project/test_questions.json -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/pyproject.toml -------------------------------------------------------------------------------- /rag_server/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/rag_server/README.md -------------------------------------------------------------------------------- /rag_server/data_process/nq_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/rag_server/data_process/nq_search.py -------------------------------------------------------------------------------- /rag_server/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/rag_server/download.py -------------------------------------------------------------------------------- /rag_server/launch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/rag_server/launch.sh -------------------------------------------------------------------------------- /rag_server/retrieval_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/rag_server/retrieval_server.py -------------------------------------------------------------------------------- /recipe/dapo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/README.md -------------------------------------------------------------------------------- /recipe/dapo/config/dapo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/config/dapo_trainer.yaml -------------------------------------------------------------------------------- /recipe/dapo/dapo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/dapo_ray_trainer.py -------------------------------------------------------------------------------- /recipe/dapo/main_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/main_dapo.py -------------------------------------------------------------------------------- /recipe/dapo/prepare_dapo_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/prepare_dapo_data.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_early_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/run_dapo_early_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen2.5_0.5b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/run_dapo_qwen2.5_0.5b.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/run_dapo_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/dapo/test_dapo_7b.sh -------------------------------------------------------------------------------- /recipe/drgrpo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/drgrpo/README.md -------------------------------------------------------------------------------- /recipe/prime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/__init__.py -------------------------------------------------------------------------------- /recipe/prime/config/prime_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/config/prime_trainer.yaml -------------------------------------------------------------------------------- /recipe/prime/main_prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/main_prime.py -------------------------------------------------------------------------------- /recipe/prime/prime_core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/prime_core_algos.py -------------------------------------------------------------------------------- /recipe/prime/prime_dp_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/prime_dp_rm.py -------------------------------------------------------------------------------- /recipe/prime/prime_fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/prime_fsdp_workers.py -------------------------------------------------------------------------------- /recipe/prime/prime_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/prime_ray_trainer.py -------------------------------------------------------------------------------- /recipe/prime/run_prime_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/prime/run_prime_qwen.sh -------------------------------------------------------------------------------- /recipe/r1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/README.md -------------------------------------------------------------------------------- /recipe/r1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/__init__.py -------------------------------------------------------------------------------- /recipe/r1/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/config/evaluation.yaml -------------------------------------------------------------------------------- /recipe/r1/data_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/data_process.py -------------------------------------------------------------------------------- /recipe/r1/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/main_eval.py -------------------------------------------------------------------------------- /recipe/r1/reward_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/reward_score.py -------------------------------------------------------------------------------- /recipe/r1/run_r1_distill_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/run_r1_distill_qwen.sh -------------------------------------------------------------------------------- /recipe/r1/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/tasks/__init__.py -------------------------------------------------------------------------------- /recipe/r1/tasks/gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/tasks/gpqa.py -------------------------------------------------------------------------------- /recipe/r1/tasks/livecodebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/tasks/livecodebench.py -------------------------------------------------------------------------------- /recipe/r1/tasks/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/r1/tasks/math.py -------------------------------------------------------------------------------- /recipe/sppo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/README.md -------------------------------------------------------------------------------- /recipe/sppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/__init__.py -------------------------------------------------------------------------------- /recipe/sppo/config/sppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/config/sppo_trainer.yaml -------------------------------------------------------------------------------- /recipe/sppo/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/dp_actor.py -------------------------------------------------------------------------------- /recipe/sppo/main_sppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/main_sppo.py -------------------------------------------------------------------------------- /recipe/sppo/run_qwen2.5-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/run_qwen2.5-7b_rm.sh -------------------------------------------------------------------------------- /recipe/sppo/sppo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/sppo_ray_trainer.py -------------------------------------------------------------------------------- /recipe/sppo/sppo_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/sppo/sppo_worker.py -------------------------------------------------------------------------------- /recipe/travelplanner/README_TravelPlanner.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/README_TravelPlanner.md -------------------------------------------------------------------------------- /recipe/travelplanner/env/travelplanner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/env/travelplanner.py -------------------------------------------------------------------------------- /recipe/travelplanner/main_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/main_grpo.sh -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/__init__.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/database/attractions/attractions.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/database/attractions/attractions.csv -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/database/background/citySet.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/database/background/citySet.txt -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/database/background/citySet_with_states.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/database/background/citySet_with_states.txt -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/database/background/stateSet.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/database/background/stateSet.txt -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/evaluation/__init__.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/evaluation/commonsense_constraint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/evaluation/commonsense_constraint.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/evaluation/custom_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/evaluation/custom_eval.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/evaluation/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/evaluation/eval.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/evaluation/hard_constraint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/evaluation/hard_constraint.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/hard_constraints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/hard_constraints.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/reward_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/reward_test.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/accommodations/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/accommodations/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/accommodations/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/attractions/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/attractions/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/cities/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/cities/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/flights/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/flights/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/flights/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/googleDistanceMatrix/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/googleDistanceMatrix/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/notebook/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/notebook/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/notebook/test.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/planner/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/planner/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/planner/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/planner/env.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/planner/sole_planning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/planner/sole_planning.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/planner/test.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/restaurants/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/tools/restaurants/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/tools/restaurants/apis.py -------------------------------------------------------------------------------- /recipe/travelplanner/reward_score/travel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/reward_score/travel.py -------------------------------------------------------------------------------- /recipe/travelplanner/server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /recipe/travelplanner/server/accommodation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/server/accommodation_server.py -------------------------------------------------------------------------------- /recipe/travelplanner/server/attraction_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/server/attraction_server.py -------------------------------------------------------------------------------- /recipe/travelplanner/server/base_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/server/base_server.py -------------------------------------------------------------------------------- /recipe/travelplanner/server/distance_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/server/distance_server.py -------------------------------------------------------------------------------- /recipe/travelplanner/server/flight_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/server/flight_server.py -------------------------------------------------------------------------------- /recipe/travelplanner/server/launch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/server/launch.sh -------------------------------------------------------------------------------- /recipe/travelplanner/server/restaurant_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/server/restaurant_server.py -------------------------------------------------------------------------------- /recipe/travelplanner/tools/accommodation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/tools/accommodation.py -------------------------------------------------------------------------------- /recipe/travelplanner/tools/attraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/tools/attraction.py -------------------------------------------------------------------------------- /recipe/travelplanner/tools/distance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/tools/distance.py -------------------------------------------------------------------------------- /recipe/travelplanner/tools/flight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/tools/flight.py -------------------------------------------------------------------------------- /recipe/travelplanner/tools/restaurant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/tools/restaurant.py -------------------------------------------------------------------------------- /recipe/travelplanner/tools/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/tools/test.py -------------------------------------------------------------------------------- /recipe/travelplanner/travelplanner_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/recipe/travelplanner/travelplanner_preprocess.py -------------------------------------------------------------------------------- /redis_server/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/redis_server/README.md -------------------------------------------------------------------------------- /redis_server/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/redis_server/client.py -------------------------------------------------------------------------------- /redis_server/start_redis.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/redis_server/start_redis.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_sglang.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/requirements_sglang.txt -------------------------------------------------------------------------------- /scripts/converter_hf_to_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/converter_hf_to_mcore.py -------------------------------------------------------------------------------- /scripts/diagnose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/diagnose.py -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/format.sh -------------------------------------------------------------------------------- /scripts/install_nginx.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/install_nginx.sh -------------------------------------------------------------------------------- /scripts/install_vllm_sglang_mcore.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/install_vllm_sglang_mcore.sh -------------------------------------------------------------------------------- /scripts/model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/model_merger.py -------------------------------------------------------------------------------- /scripts/nq_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/nq_search.py -------------------------------------------------------------------------------- /scripts/run_vllm_with_nginx.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/run_vllm_with_nginx.sh -------------------------------------------------------------------------------- /scripts/vllm_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/scripts/vllm_server.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/setup.py -------------------------------------------------------------------------------- /swift/r1_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/swift/r1_dataset.py -------------------------------------------------------------------------------- /swift/r1_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/swift/r1_reward.py -------------------------------------------------------------------------------- /swift/rlfactory_env.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/swift/rlfactory_env.json -------------------------------------------------------------------------------- /swift/rlfactory_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/swift/rlfactory_reward.py -------------------------------------------------------------------------------- /swift/rlfactory_toolcall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/swift/rlfactory_toolcall.py -------------------------------------------------------------------------------- /swift_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/swift_grpo.sh -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/checkpoint/run_deepseek_megatron_ckpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/checkpoint/run_deepseek_megatron_ckpt.sh -------------------------------------------------------------------------------- /tests/checkpoint/run_qwen_megatron_ckpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/checkpoint/run_qwen_megatron_ckpt.sh -------------------------------------------------------------------------------- /tests/checkpoint/test_fsdp_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/checkpoint/test_fsdp_ckpt.py -------------------------------------------------------------------------------- /tests/distributed/run_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/distributed/run_all.sh -------------------------------------------------------------------------------- /tests/distributed/test_tensor_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /tests/distro/requirements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/distro/requirements.py -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/__init__.py -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/create_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/data/create_dataset.py -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/model/config.json -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/create_model_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/model/create_model_tokenizer.py -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/model/generation_config.json -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/model/tokenizer_config.json -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/rl/README.md -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/main_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/arithmetic_sequence/rl/main_trainer.py -------------------------------------------------------------------------------- /tests/e2e/check_custom_rwd_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/check_custom_rwd_fn.py -------------------------------------------------------------------------------- /tests/e2e/check_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/check_results.py -------------------------------------------------------------------------------- /tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/envs/__init__.py -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/envs/digit_completion/__init__.py -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/envs/digit_completion/task.py -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/envs/digit_completion/tokenizer.py -------------------------------------------------------------------------------- /tests/e2e/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/generation/run_gen_qwen05.sh -------------------------------------------------------------------------------- /tests/e2e/ppo_trainer/run_function_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/ppo_trainer/run_function_reward.sh -------------------------------------------------------------------------------- /tests/e2e/ppo_trainer/run_model_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/ppo_trainer/run_model_reward.sh -------------------------------------------------------------------------------- /tests/e2e/run_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_dapo.sh -------------------------------------------------------------------------------- /tests/e2e/run_deepseek_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_deepseek_grpo.sh -------------------------------------------------------------------------------- /tests/e2e/run_deepseek_grpo_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_deepseek_grpo_megatron.sh -------------------------------------------------------------------------------- /tests/e2e/run_deepseek_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_deepseek_megatron.sh -------------------------------------------------------------------------------- /tests/e2e/run_deepseek_megatron_parallelism.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_deepseek_megatron_parallelism.sh -------------------------------------------------------------------------------- /tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh -------------------------------------------------------------------------------- /tests/e2e/run_ppo_trainer_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_ppo_trainer_megatron.sh -------------------------------------------------------------------------------- /tests/e2e/run_prime.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_prime.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen2vl_geo3k_function_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen2vl_geo3k_function_rm.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_grpo.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_grpo_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_grpo_megatron.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_custom_function_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_custom_function_rm.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_function_rm.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_remax.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_model_rm.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm_liger_kernel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_model_rm_liger_kernel.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_prime.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_gsm8k_prime.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_megatron.sh -------------------------------------------------------------------------------- /tests/e2e/run_qwen_megatron_parallelism.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_qwen_megatron_parallelism.sh -------------------------------------------------------------------------------- /tests/e2e/run_r1_distill_qwen_aime24_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_r1_distill_qwen_aime24_eval.sh -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_ray_trainer.sh -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_fire_sampling.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_ray_trainer_fire_sampling.sh -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_ray_trainer_rmpad.sh -------------------------------------------------------------------------------- /tests/e2e/run_sppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_sppo.sh -------------------------------------------------------------------------------- /tests/e2e/run_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/run_test.sh -------------------------------------------------------------------------------- /tests/e2e/sft/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/sft/run_sft.sh -------------------------------------------------------------------------------- /tests/e2e/sft/test_sp_loss_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/e2e/sft/test_sp_loss_match.py -------------------------------------------------------------------------------- /tests/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/generation/run_gen_qwen05.sh -------------------------------------------------------------------------------- /tests/gpu_utility/test_memory_buffers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/gpu_utility/test_memory_buffers.py -------------------------------------------------------------------------------- /tests/gpu_utility/test_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/gpu_utility/test_ops.py -------------------------------------------------------------------------------- /tests/gpu_utility/test_torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/gpu_utility/test_torch_functional.py -------------------------------------------------------------------------------- /tests/kernels/test_linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/kernels/test_linear_cross_entropy.py -------------------------------------------------------------------------------- /tests/kill_github_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/kill_github_tests.sh -------------------------------------------------------------------------------- /tests/model/test_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/model/test_transformer.py -------------------------------------------------------------------------------- /tests/model/test_transformers_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/model/test_transformers_ulysses.py -------------------------------------------------------------------------------- /tests/models/test_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/models/test_transformer.py -------------------------------------------------------------------------------- /tests/models/test_transformers_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/models/test_transformers_ulysses.py -------------------------------------------------------------------------------- /tests/ray/check_worker_alive/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/check_worker_alive/main.py -------------------------------------------------------------------------------- /tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/detached_worker/README.md -------------------------------------------------------------------------------- /tests/ray/detached_worker/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/detached_worker/client.py -------------------------------------------------------------------------------- /tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/detached_worker/run.sh -------------------------------------------------------------------------------- /tests/ray/detached_worker/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/detached_worker/server.py -------------------------------------------------------------------------------- /tests/ray/test_check_worker_alive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_check_worker_alive.py -------------------------------------------------------------------------------- /tests/ray/test_colocated_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_colocated_workers.py -------------------------------------------------------------------------------- /tests/ray/test_data_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_data_transfer.py -------------------------------------------------------------------------------- /tests/ray/test_driverfunc_to_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_driverfunc_to_worker.py -------------------------------------------------------------------------------- /tests/ray/test_high_level_scheduling_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_high_level_scheduling_api.py -------------------------------------------------------------------------------- /tests/ray/test_ray_local_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_ray_local_envs.py -------------------------------------------------------------------------------- /tests/ray/test_rvdz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_rvdz.py -------------------------------------------------------------------------------- /tests/ray/test_worker_group_basics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_worker_group_basics.py -------------------------------------------------------------------------------- /tests/ray/test_worker_group_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray/test_worker_group_torch.py -------------------------------------------------------------------------------- /tests/ray_cpu/check_worker_alive/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_cpu/check_worker_alive/main.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_auto_padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_cpu/test_auto_padding.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_check_worker_alive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_cpu/test_check_worker_alive.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_cpu/test_decorator.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_fused_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_cpu/test_fused_workers.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_ray_local_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_cpu/test_ray_local_envs.py -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/detached_worker/README.md -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/detached_worker/client.py -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/detached_worker/run.sh -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/detached_worker/server.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_colocated_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_colocated_workers.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_colocated_workers_fused.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_colocated_workers_fused.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_data_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_data_transfer.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_driverfunc_to_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_driverfunc_to_worker.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_high_level_scheduling_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_high_level_scheduling_api.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_rvdz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_rvdz.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_worker_group_basics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_worker_group_basics.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_worker_group_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/ray_gpu/test_worker_group_torch.py -------------------------------------------------------------------------------- /tests/reward_score/test_sandbox_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/reward_score/test_sandbox_fusion.py -------------------------------------------------------------------------------- /tests/rl_factory/envs/test_tool_use.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/envs/test_tool_use.py -------------------------------------------------------------------------------- /tests/rl_factory/generator/async_generator_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/generator/async_generator_test.py -------------------------------------------------------------------------------- /tests/rl_factory/generator/async_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/generator/async_results.csv -------------------------------------------------------------------------------- /tests/rl_factory/generator/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/generator/test.sh -------------------------------------------------------------------------------- /tests/rl_factory/rewarder/test_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/rewarder/test_parallel.py -------------------------------------------------------------------------------- /tests/rl_factory/test_async_tool_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/test_async_tool_manager.py -------------------------------------------------------------------------------- /tests/rl_factory/test_llama3_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/test_llama3_manager.py -------------------------------------------------------------------------------- /tests/rl_factory/test_qwen3_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rl_factory/test_qwen3_manager.py -------------------------------------------------------------------------------- /tests/rollout/run_fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rollout/run_fsdp_vllm.py -------------------------------------------------------------------------------- /tests/rollout/test_sglang_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rollout/test_sglang_spmd.py -------------------------------------------------------------------------------- /tests/rollout/test_vllm_hf_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rollout/test_vllm_hf_loader.py -------------------------------------------------------------------------------- /tests/rollout/test_vllm_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/rollout/test_vllm_spmd.py -------------------------------------------------------------------------------- /tests/sandbox/test_sandbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sandbox/test_sandbox.py -------------------------------------------------------------------------------- /tests/sanity/check_license.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sanity/check_license.py -------------------------------------------------------------------------------- /tests/sanity/test_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sanity/test_import.py -------------------------------------------------------------------------------- /tests/sft/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sft/run_sft.sh -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_peft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sft/run_sft_qwen05_peft.sh -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_sp2_liger.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sft/run_sft_qwen05_sp2_liger.sh -------------------------------------------------------------------------------- /tests/sft/run_sft_sp_loss_match.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sft/run_sft_sp_loss_match.sh -------------------------------------------------------------------------------- /tests/sft/test_sp_loss_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/sft/test_sp_loss_match.py -------------------------------------------------------------------------------- /tests/single_controller/base/test_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/single_controller/base/test_decorator.py -------------------------------------------------------------------------------- /tests/test_mcp_cache_real.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/test_mcp_cache_real.py -------------------------------------------------------------------------------- /tests/test_protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/test_protocol.py -------------------------------------------------------------------------------- /tests/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/trainer/__init__.py -------------------------------------------------------------------------------- /tests/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /tests/trainer/ppo/test_metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/trainer/ppo/test_metric_utils.py -------------------------------------------------------------------------------- /tests/utility/test_tensor_dict_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utility/test_tensor_dict_utilities.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/cpu_tests/test_fs.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/cpu_tests/test_import_utils.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/cpu_tests/test_model.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/cpu_tests/test_module.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_timeout_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/cpu_tests/test_timeout_decorator.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/checkpoint/test_fsdp_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/checkpoint/test_fsdp_ckpt.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/dataset/test_multiturn_sft_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/dataset/test_rl_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/dataset/test_rm_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/dataset/test_sft_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/test_flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/test_flops_counter.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/test_seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/test_seqlen_balancing.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/test_torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/utils/gpu_tests/test_torch_functional.py -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/verl/utils/dataset/test_rl_dataset.py -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/verl/utils/dataset/test_rm_dataset.py -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/verl/utils/dataset/test_sft_dataset.py -------------------------------------------------------------------------------- /tests/workers/rollout/async_rollout_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/async_rollout_utils.py -------------------------------------------------------------------------------- /tests/workers/rollout/run_fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/run_fsdp_vllm.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_hf_rollout.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_sglang_async_rollout_w_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_sglang_async_rollout_w_tools.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_sglang_async_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_sglang_async_spmd.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_sglang_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_sglang_spmd.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_vllm_hf_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_vllm_hf_loader.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_vllm_multi_turn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_vllm_multi_turn.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_vllm_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_vllm_spmd.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_vllm_tool_calling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/test_vllm_tool_calling.py -------------------------------------------------------------------------------- /tests/workers/rollout/utils_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/tests/workers/rollout/utils_sglang.py -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/__init__.py -------------------------------------------------------------------------------- /verl/base_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/base_config.py -------------------------------------------------------------------------------- /verl/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/__init__.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/agent_loop/__init__.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/agent_loop/agent_loop.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/single_turn_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/agent_loop/single_turn_agent_loop.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/tool_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/agent_loop/tool_agent_loop.py -------------------------------------------------------------------------------- /verl/experimental/agent_loop/tool_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/agent_loop/tool_parser.py -------------------------------------------------------------------------------- /verl/experimental/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/dataset/__init__.py -------------------------------------------------------------------------------- /verl/experimental/dataset/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/dataset/sampler.py -------------------------------------------------------------------------------- /verl/experimental/dynamic_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/dynamic_dataset/__init__.py -------------------------------------------------------------------------------- /verl/experimental/dynamic_dataset/dynamicgen_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/experimental/dynamic_dataset/dynamicgen_dataset.py -------------------------------------------------------------------------------- /verl/interactions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/interactions/__init__.py -------------------------------------------------------------------------------- /verl/interactions/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/interactions/base.py -------------------------------------------------------------------------------- /verl/interactions/gsm8k_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/interactions/gsm8k_interaction.py -------------------------------------------------------------------------------- /verl/interactions/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/interactions/utils/__init__.py -------------------------------------------------------------------------------- /verl/interactions/utils/interaction_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/interactions/utils/interaction_registry.py -------------------------------------------------------------------------------- /verl/model_merger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/model_merger/__init__.py -------------------------------------------------------------------------------- /verl/model_merger/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/model_merger/__main__.py -------------------------------------------------------------------------------- /verl/model_merger/base_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/model_merger/base_model_merger.py -------------------------------------------------------------------------------- /verl/model_merger/fsdp_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/model_merger/fsdp_model_merger.py -------------------------------------------------------------------------------- /verl/model_merger/megatron_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/model_merger/megatron_model_merger.py -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/README.md -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/llama_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_loader.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/llama_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_saver.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/modeling_llama_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/llama/megatron/modeling_llama_megatron.py -------------------------------------------------------------------------------- /verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/__init__.py -------------------------------------------------------------------------------- /verl/models/mcore/config_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/config_converter.py -------------------------------------------------------------------------------- /verl/models/mcore/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/loader.py -------------------------------------------------------------------------------- /verl/models/mcore/mbridge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/mbridge.py -------------------------------------------------------------------------------- /verl/models/mcore/model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/model_forward.py -------------------------------------------------------------------------------- /verl/models/mcore/model_forward_fused.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/model_forward_fused.py -------------------------------------------------------------------------------- /verl/models/mcore/model_initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/model_initializer.py -------------------------------------------------------------------------------- /verl/models/mcore/patch_v012.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/patch_v012.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/qwen2_5_vl/__init__.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/qwen2_5_vl/attention.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/qwen2_5_vl/model.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/rope_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/qwen2_5_vl/rope_utils.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/vision_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/qwen2_5_vl/vision_config.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/vision_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/qwen2_5_vl/vision_model.py -------------------------------------------------------------------------------- /verl/models/mcore/qwen2_5_vl/vision_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/qwen2_5_vl/vision_transformer_block.py -------------------------------------------------------------------------------- /verl/models/mcore/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/readme.md -------------------------------------------------------------------------------- /verl/models/mcore/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/registry.py -------------------------------------------------------------------------------- /verl/models/mcore/saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/saver.py -------------------------------------------------------------------------------- /verl/models/mcore/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/util.py -------------------------------------------------------------------------------- /verl/models/mcore/weight_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/mcore/weight_converter.py -------------------------------------------------------------------------------- /verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/layers/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/modeling_qwen2_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/qwen2/megatron/modeling_qwen2_megatron.py -------------------------------------------------------------------------------- /verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/registry.py -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /verl/models/transformers/dense_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/dense_common.py -------------------------------------------------------------------------------- /verl/models/transformers/kimi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/kimi_vl.py -------------------------------------------------------------------------------- /verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /verl/models/transformers/npu_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/npu_patch.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2_5_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/qwen2_5_vl.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/protocol.py -------------------------------------------------------------------------------- /verl/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/megatron/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/megatron/worker.py -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/megatron/worker_group.py -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /verl/single_controller/ray/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/single_controller/ray/megatron.py -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/__init__.py -------------------------------------------------------------------------------- /verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/sglang/__init__.py -------------------------------------------------------------------------------- /verl/third_party/sglang/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/sglang/parallel_state.py -------------------------------------------------------------------------------- /verl/third_party/torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/torch/__init__.py -------------------------------------------------------------------------------- /verl/third_party/torch/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/torch/distributed/__init__.py -------------------------------------------------------------------------------- /verl/third_party/torch/distributed/_state_dict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/torch/distributed/_state_dict_utils.py -------------------------------------------------------------------------------- /verl/third_party/torch/distributed/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/torch/distributed/checkpoint/__init__.py -------------------------------------------------------------------------------- /verl/third_party/torch/distributed/checkpoint/state_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/torch/distributed/checkpoint/state_dict.py -------------------------------------------------------------------------------- /verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /verl/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/__init__.py -------------------------------------------------------------------------------- /verl/tools/base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/base_tool.py -------------------------------------------------------------------------------- /verl/tools/geo3k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/geo3k_tool.py -------------------------------------------------------------------------------- /verl/tools/gsm8k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/gsm8k_tool.py -------------------------------------------------------------------------------- /verl/tools/mcp_base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/mcp_base_tool.py -------------------------------------------------------------------------------- /verl/tools/mcp_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/mcp_search_tool.py -------------------------------------------------------------------------------- /verl/tools/sandbox_fusion_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/sandbox_fusion_tools.py -------------------------------------------------------------------------------- /verl/tools/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/schemas.py -------------------------------------------------------------------------------- /verl/tools/search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/search_tool.py -------------------------------------------------------------------------------- /verl/tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/utils/__init__.py -------------------------------------------------------------------------------- /verl/tools/utils/mcp_clients/McpClientManager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/utils/mcp_clients/McpClientManager.py -------------------------------------------------------------------------------- /verl/tools/utils/mcp_clients/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/utils/mcp_clients/utils.py -------------------------------------------------------------------------------- /verl/tools/utils/search_r1_like_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/utils/search_r1_like_utils.py -------------------------------------------------------------------------------- /verl/tools/utils/tool_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/tools/utils/tool_registry.py -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/__init__.py -------------------------------------------------------------------------------- /verl/trainer/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/__init__.py -------------------------------------------------------------------------------- /verl/trainer/config/_generated_ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/_generated_ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/_generated_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/_generated_ppo_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/actor/actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/actor/actor.yaml -------------------------------------------------------------------------------- /verl/trainer/config/actor/dp_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/actor/dp_actor.yaml -------------------------------------------------------------------------------- /verl/trainer/config/actor/megatron_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/actor/megatron_actor.yaml -------------------------------------------------------------------------------- /verl/trainer/config/algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/algorithm.py -------------------------------------------------------------------------------- /verl/trainer/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/config.py -------------------------------------------------------------------------------- /verl/trainer/config/critic/critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/critic/critic.yaml -------------------------------------------------------------------------------- /verl/trainer/config/critic/dp_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/critic/dp_critic.yaml -------------------------------------------------------------------------------- /verl/trainer/config/critic/megatron_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/critic/megatron_critic.yaml -------------------------------------------------------------------------------- /verl/trainer/config/data/legacy_data.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/data/legacy_data.yaml -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /verl/trainer/config/npu_profile/npu_profile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/npu_profile/npu_profile.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ref/dp_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/ref/dp_ref.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ref/megatron_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/ref/megatron_ref.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ref/ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/ref/ref.yaml -------------------------------------------------------------------------------- /verl/trainer/config/reward_model/dp_reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/reward_model/dp_reward_model.yaml -------------------------------------------------------------------------------- /verl/trainer/config/reward_model/megatron_reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/reward_model/megatron_reward_model.yaml -------------------------------------------------------------------------------- /verl/trainer/config/reward_model/reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/reward_model/reward_model.yaml -------------------------------------------------------------------------------- /verl/trainer/config/rl_factory_ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/rl_factory_ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/rl_factory_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/rl_factory_ppo_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/rollout/reward_rollout.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/rollout/reward_rollout.yaml -------------------------------------------------------------------------------- /verl/trainer/config/rollout/rollout.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/rollout/rollout.yaml -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/constants_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/constants_ppo.py -------------------------------------------------------------------------------- /verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /verl/trainer/main_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/main_evaluate.py -------------------------------------------------------------------------------- /verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /verl/trainer/ppo/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/ppo/metric_utils.py -------------------------------------------------------------------------------- /verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /verl/trainer/ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/ppo/reward.py -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/__init__.py -------------------------------------------------------------------------------- /verl/utils/activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/activation_offload.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/megatron_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/checkpoint/megatron_checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/config.py -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /verl/utils/dataset/multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/dataset/multiturn_sft_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/vision_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/dataset/vision_utils.py -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /verl/utils/debug/empty_annotations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/debug/empty_annotations.py -------------------------------------------------------------------------------- /verl/utils/debug/nvtx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/debug/nvtx_profile.py -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /verl/utils/device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/device.py -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/distributed.py -------------------------------------------------------------------------------- /verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/experimental/__init__.py -------------------------------------------------------------------------------- /verl/utils/experimental/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/experimental/torch_functional.py -------------------------------------------------------------------------------- /verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/fs.py -------------------------------------------------------------------------------- /verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/import_utils.py -------------------------------------------------------------------------------- /verl/utils/kernel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/kernel/__init__.py -------------------------------------------------------------------------------- /verl/utils/kernel/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/kernel/kernels.py -------------------------------------------------------------------------------- /verl/utils/kernel/linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/kernel/linear_cross_entropy.py -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /verl/utils/megatron/dist_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron/dist_checkpointing.py -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/metric/__init__.py -------------------------------------------------------------------------------- /verl/utils/metric/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/metric/utils.py -------------------------------------------------------------------------------- /verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/model.py -------------------------------------------------------------------------------- /verl/utils/net_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/net_utils.py -------------------------------------------------------------------------------- /verl/utils/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/profiler/__init__.py -------------------------------------------------------------------------------- /verl/utils/profiler/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/profiler/config.py -------------------------------------------------------------------------------- /verl/utils/profiler/empty_annotations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/profiler/empty_annotations.py -------------------------------------------------------------------------------- /verl/utils/profiler/mstx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/profiler/mstx_profile.py -------------------------------------------------------------------------------- /verl/utils/profiler/nvtx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/profiler/nvtx_profile.py -------------------------------------------------------------------------------- /verl/utils/profiler/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/profiler/performance.py -------------------------------------------------------------------------------- /verl/utils/profiler/profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/profiler/profile.py -------------------------------------------------------------------------------- /verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/py_functional.py -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/geo3k.py -------------------------------------------------------------------------------- /verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/math_batch.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/math_dapo.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/math_verify.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/prime_code/README.md -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/prime_code/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/testing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/prime_code/testing_util.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/prime_code/utils.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/prime_math/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/prime_math/grader.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/prime_math/math_normalize.py -------------------------------------------------------------------------------- /verl/utils/reward_score/sandbox_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/sandbox_fusion/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/sandbox_fusion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/sandbox_fusion/utils.py -------------------------------------------------------------------------------- /verl/utils/reward_score/search_r1_like_qa_em.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/reward_score/search_r1_like_qa_em.py -------------------------------------------------------------------------------- /verl/utils/rollout_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/rollout_trace.py -------------------------------------------------------------------------------- /verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/tracking.py -------------------------------------------------------------------------------- /verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/ulysses.py -------------------------------------------------------------------------------- /verl/utils/vllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/utils/vllm_utils.py -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.5.0 2 | -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/__init__.py -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/actor/base.py -------------------------------------------------------------------------------- /verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /verl/workers/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/config/__init__.py -------------------------------------------------------------------------------- /verl/workers/config/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/config/actor.py -------------------------------------------------------------------------------- /verl/workers/config/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/config/engine.py -------------------------------------------------------------------------------- /verl/workers/config/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/config/optimizer.py -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/critic/base.py -------------------------------------------------------------------------------- /verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /verl/workers/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/engine/__init__.py -------------------------------------------------------------------------------- /verl/workers/engine/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/engine/base.py -------------------------------------------------------------------------------- /verl/workers/engine/fsdp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/engine/fsdp/__init__.py -------------------------------------------------------------------------------- /verl/workers/engine/fsdp/engine_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/engine/fsdp/engine_impl.py -------------------------------------------------------------------------------- /verl/workers/engine/fsdp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/engine/fsdp/utils.py -------------------------------------------------------------------------------- /verl/workers/engine/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/engine/megatron/__init__.py -------------------------------------------------------------------------------- /verl/workers/engine/megatron/engine_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/engine/megatron/engine_impl.py -------------------------------------------------------------------------------- /verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_manager/batch.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_manager/dapo.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_manager/naive.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_manager/parallel.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_manager/prime.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_manager/registry.py -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/reward_model/megatron/reward_model.py -------------------------------------------------------------------------------- /verl/workers/roles/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/roles/__init__.py -------------------------------------------------------------------------------- /verl/workers/roles/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/roles/actor.py -------------------------------------------------------------------------------- /verl/workers/roles/critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/roles/critic.py -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/async_server.py -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /verl/workers/rollout/chat_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/chat_scheduler.py -------------------------------------------------------------------------------- /verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/schemas.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/sglang_rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/async_sglang_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/sglang_rollout/async_sglang_server.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/sglang_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/sglang_rollout/sglang_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/sglang_rollout/utils.py -------------------------------------------------------------------------------- /verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/vllm_async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/vllm_rollout/vllm_async_server.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/sharding_manager/fsdp_sglang.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/megatron_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/sharding_manager/megatron_sglang.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/megatron_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/verl/workers/sharding_manager/megatron_vllm.py -------------------------------------------------------------------------------- /webui/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/README.md -------------------------------------------------------------------------------- /webui/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/app.py -------------------------------------------------------------------------------- /webui/components/flow_editor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/components/flow_editor.py -------------------------------------------------------------------------------- /webui/components/flow_editor.pyi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/components/flow_editor.pyi -------------------------------------------------------------------------------- /webui/components/rewards/graders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/components/rewards/graders/__init__.py -------------------------------------------------------------------------------- /webui/components/rewards/graders/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/components/rewards/graders/base.py -------------------------------------------------------------------------------- /webui/components/rewards/graders/graders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/components/rewards/graders/graders.py -------------------------------------------------------------------------------- /webui/components/rewards/graders/qwen_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/components/rewards/graders/qwen_math.py -------------------------------------------------------------------------------- /webui/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/requirements.txt -------------------------------------------------------------------------------- /webui/rewards/reward_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/rewards/reward_config.json -------------------------------------------------------------------------------- /webui/run_webui.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/run_webui.sh -------------------------------------------------------------------------------- /webui/tabs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/tabs/__init__.py -------------------------------------------------------------------------------- /webui/tabs/data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/tabs/data_processing.py -------------------------------------------------------------------------------- /webui/tabs/project_management.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/tabs/project_management.py -------------------------------------------------------------------------------- /webui/tabs/reward_definition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/tabs/reward_definition.py -------------------------------------------------------------------------------- /webui/tabs/tool_definition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/tabs/tool_definition.py -------------------------------------------------------------------------------- /webui/tabs/training_deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/tabs/training_deployment.py -------------------------------------------------------------------------------- /webui/tabs/utils/get_env_codes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Simple-Efficient/RL-Factory/HEAD/webui/tabs/utils/get_env_codes.py --------------------------------------------------------------------------------